bespoke 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,11 +19,12 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.add_development_dependency "bundler", ">= 1.0.0"
21
21
  gem.add_development_dependency "rspec", "~> 2.6"
22
- gem.add_development_dependency "debugger"
22
+ gem.add_development_dependency 'guard'
23
+ gem.add_development_dependency 'guard-rspec'
23
24
 
24
25
  gem.add_dependency 'rake'
25
- gem.add_dependency 'docile'
26
26
  gem.add_dependency 'mustache'
27
27
  gem.add_dependency 'sqlite3'
28
28
  gem.add_dependency 'sequel'
29
+ gem.add_dependency 'activesupport'
29
30
  end
@@ -1,6 +1,20 @@
1
1
  require "bespoke/version"
2
2
 
3
+ module Bespoke
4
+ class Error < StandardError
5
+ unless instance_methods.include? :cause
6
+ attr_reader :cause
7
+
8
+ def initialize(msg, cause = $!)
9
+ super(msg)
10
+ @cause = cause
11
+ end
12
+ end
13
+ end
14
+ end
15
+
3
16
  require "bespoke/template"
4
17
  require "bespoke/xsltproc"
5
18
  require "bespoke/projection"
6
19
  require "bespoke/join"
20
+ require 'bespoke/export'
@@ -1,6 +1,5 @@
1
1
  require "bespoke"
2
- require "docile"
3
2
 
4
3
  def projection(name, xpath_root=nil, &block)
5
- Docile.dsl_eval(Bespoke::Projection.new(name, xpath_root), &block)
6
- end
4
+ Bespoke::Projection.new(name, xpath_root, &block)
5
+ end
@@ -0,0 +1,7 @@
1
+ module Bespoke
2
+ class Export
3
+
4
+ end
5
+ end
6
+
7
+ require_relative 'export/filter'
@@ -0,0 +1,154 @@
1
+ require_relative 'filter/select_template'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ attr_reader :templates, :data_set
7
+
8
+ # Constructs a new filter
9
+ #
10
+ # @param dataset [Sequel::Dataset] a Sequel::Dataset, this is genrally
11
+ # obtained using #[] on an instance of a Sequel::Database but, can also
12
+ # be the output of another filter's `#apply` method.
13
+ #
14
+ # @param templates [Hash{Symbol => String, Array, Hash{String, Symbol => String, Symbol}}]
15
+ # A hash of hashes specifying columns/aliases for select clause output,
16
+ # joins to perform (only performs left joins right now, this needs some
17
+ # work) and finally a where clause to filter the results. The selects,
18
+ # joins and where keys MUST be symbols, the nested hashes (when used)
19
+ # MAY have either String or Symbol keys.
20
+ #
21
+ # :selects key ==
22
+ # The selects key is most useful as a hash, the keys will get used as aliases
23
+ # for the specified template strings or symbol column defniitions.
24
+ #
25
+ # The following examples cover most use cases:
26
+ # ```
27
+ # # basic column specifications
28
+ # { first_name: :students__first_name } # => `students`.`first_name` AS 'first_name'
29
+ # { 'first_name' => '<students.first_name>' } # => `students`.`first_name` AS 'first_name'
30
+ # { 'first_name' => :first_name } # => `first_name` AS 'first_name'
31
+ #
32
+ # # generate arbitray output strings:
33
+ # { 'user_id' => 'student_<students.id>' } # => ('student_' || `students`.`first_name`) AS 'first_name'
34
+ #
35
+ # # use periods in table/column names by quoting
36
+ # { 'first_name' => '<students."name.first_name">' } # => `students`.`name.first_name` AS 'first_name'
37
+ #
38
+ # # use backticks as quotes
39
+ # { 'first_name' => '<students.`name.first_name`>' } # => `students`.`name.first_name` AS 'first_name'
40
+ #
41
+ # # coalesce values using a pipe `|` and supply a default value using single quotes
42
+ # { 'course_name' => "<staff.admin_username | staff.teacher_username | 'default_value'>" }
43
+ # # => COALESCE(`staff`.`admin_username`, `staff`.`teacher_username`, 'default_value') AS 'course_name'
44
+ #
45
+ # # use SQL functions (N.B. this currently does no manipulation on arguments so they remain strings)
46
+ # { 'course_length' => "<length('foobar')>" } # => LENGTH('foobar') AS 'course_length'
47
+ #
48
+ # # delimited arrays are specially handled using concat_ws (which rejects NULLS before concatenating the results):
49
+ # { 'course_name' => '<courses.name>-<terms.abbreviation>-<staff."name.last_name">' }
50
+ # # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '-') AS 'course_name'
51
+ #
52
+ # # in delimited arrays the delimiter can be any arbitrary sequence:
53
+ # { 'course_name' => '<courses.name>=+=<terms.abbreviation>=+=<staff."name.last_name">' }
54
+ # # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '=+=') AS 'course_name'
55
+ # ```
56
+ #
57
+ # :joins key ==
58
+ # The joins key is somewhat less flexible in what it will accept and
59
+ # generate but it still meets current needs.
60
+ #
61
+ # ```
62
+ # # in hash specifications symbols and strings are interchangeable
63
+ # # assuming we're selecting from a table named `foos`
64
+ # { bars: [:id, :foo_id] } # => LEFT JOIN `bars` ON `foos`.`id` = `bars`.`foo_id`
65
+ # [:bars, :quxes] # => LEFT JOIN `bars` LEFT JOIN `quxes`
66
+ # ```
67
+ #
68
+ # :where key ==
69
+ # This is even more limited than the joins key and I don't anticipate
70
+ # it being extended because for these transforms there isn't much
71
+ # filtering to be done.
72
+ # This key will only accept a string that gets inserted directly into
73
+ # the SQL, after extensive testing this doesn't appear to present any
74
+ # SQLi potential since only the first statement gets executed and no
75
+ # additional information can be pulled into the query at this point
76
+ # due to theis being the last segment we support. A user can still,
77
+ # however, cause the query to be invalid or reduce the amount of data
78
+ # returned.
79
+
80
+ def initialize(data_set, templates)
81
+ @data_set = data_set
82
+ @set_name = data_set.opts[:from].first
83
+ @templates = templates
84
+ end
85
+
86
+ # Applys the selects and joins to the passed dataset, returns a
87
+ # new Dataset which can be interacted with using Enumerable methods
88
+ # as well as any other methods on Sequel::Dataset
89
+ def apply
90
+ ast = apply_selects(data_set, templates[:selects], templates[:functions])
91
+ ast = apply_joins(ast, templates[:joins])
92
+ apply_where(ast, templates[:where])
93
+ end
94
+
95
+ # #sql gives access to the SQL generated by applying the filters to the
96
+ # dataset, this is mostly for diagnositcs and testing since other code
97
+ # can interact with the dataset returned from #apply using Enumerable
98
+ # methods
99
+ def sql
100
+ apply.sql
101
+ end
102
+
103
+ private
104
+
105
+ # TODO: figure out how to allow specification of what type of join we're doing
106
+ # as well as being able to join tables to ones we've already joined, maybe
107
+ # we can do something like:
108
+ # {table_three: ['<table_two.id>', '<table_three.two_id>', 'inner']}
109
+ # with the last part being optional with the default being left
110
+ def apply_joins(ast, joins)
111
+ return ast unless joins
112
+ case joins
113
+ when String
114
+ ast.join(joins)
115
+ when Array
116
+ joins.inject(ast) {|ast, join|
117
+ ast.left_join(join)
118
+ }
119
+ when Hash
120
+ joins.inject(ast) {|ast, (joined_table, columns)|
121
+ ast.left_join(joined_table, Sequel.qualify(@set_name, columns[0]) => Sequel.qualify(joined_table, columns[1]))
122
+ }
123
+ else
124
+ raise "Unsupported type (#{ joins.class }) in joins key for filter template"
125
+ end
126
+ end
127
+
128
+ def apply_selects(ast, selects, functions)
129
+ return ast unless selects
130
+ case selects
131
+ when String
132
+ ast.select(selects)
133
+ when Hash
134
+ compiled_selects = selects.map do |(select_alias, template_string)|
135
+ SelectTemplate.compile(select_alias, template_string, functions || {})
136
+ end
137
+ ast.select(*compiled_selects)
138
+ else
139
+ raise "Unsupported type (#{ selects.class }) in selects key for filter template"
140
+ end
141
+ end
142
+
143
+ def apply_where(ast, where)
144
+ return ast unless where
145
+ case where
146
+ when String
147
+ ast.where(where)
148
+ else
149
+ raise "Unsupported type (#{ where.class }) in where key for filter template"
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,61 @@
1
+ require 'active_support/core_ext/hash'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ class FunctionCall
7
+ EXTRACTION_REGEXP = /\A(?<function>[^\(]+)\((?<args>[^\)]*)\)\z/
8
+
9
+ DEFAULT_CUSTOM_FUNCTIONS = {
10
+ 'random_str' => ->(args) { "LOWER(HEX(RANDOMBLOB(#{ args[0].to_i })))" },
11
+ }.with_indifferent_access
12
+
13
+ attr_reader :custom_functions
14
+
15
+ def self.match?(string)
16
+ !!EXTRACTION_REGEXP.match(string.strip)
17
+ end
18
+
19
+ def self.extract_from(string, custom_functions={})
20
+ new(string, custom_functions).extract
21
+ end
22
+
23
+ def initialize(string, custom_functions={})
24
+ @string = string.strip
25
+ @custom_functions = DEFAULT_CUSTOM_FUNCTIONS.merge(custom_functions)
26
+ end
27
+
28
+ def extract
29
+ if custom_function?
30
+ Sequel.lit(compile_custom_function)
31
+ else
32
+ Sequel.function(function.upcase, *arguments)
33
+ end
34
+ end
35
+
36
+ def function
37
+ call_parts[:function]
38
+ end
39
+
40
+ # TODO: Make this type cast integers, booleans and possibly identifiers
41
+ def arguments
42
+ @arguments ||= call_parts[:args].split(',').map(&:strip)
43
+ end
44
+
45
+ private
46
+
47
+ def compile_custom_function
48
+ @custom_functions[function.downcase].call(arguments)
49
+ end
50
+
51
+ def custom_function?
52
+ @custom_functions.keys.include? function.downcase
53
+ end
54
+
55
+ def call_parts
56
+ @function_parts ||= EXTRACTION_REGEXP.match(@string)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,66 @@
1
+ require 'strscan'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ class Identifier
7
+ def self.extract_from(value)
8
+ new(value).sequel_identifier
9
+ end
10
+
11
+ def initialize(unparsed_value)
12
+ @unparsed_value = unparsed_value.dup.strip.freeze
13
+ end
14
+
15
+ def sequel_identifier
16
+ table.nil? ? Sequel.identifier(column) : Sequel.qualify(table, column)
17
+ end
18
+
19
+ def column
20
+ parts[:column]
21
+ end
22
+
23
+ def table
24
+ parts[:table]
25
+ end
26
+
27
+ QUOTED_PART_EXTRACTION_REGEXP = /(?<quote>["`])(?<value>\S+?)\k<quote>/
28
+ def parts
29
+ return @parts if @parts
30
+ if quoted_identifier?
31
+ parts_array = extract_parts_from_quoted_identifier
32
+ else
33
+ parts_array = @unparsed_value.split('.')
34
+ end
35
+ @parts = {
36
+ table: (parts_array.length == 1 ? nil : parts_array.first),
37
+ column: parts_array.last,
38
+ }
39
+ end
40
+
41
+ private
42
+
43
+ def quoted_identifier?
44
+ !!QUOTED_PART_EXTRACTION_REGEXP.match(@unparsed_value)
45
+ end
46
+
47
+ def extract_parts_from_quoted_identifier
48
+ scanner = StringScanner.new(@unparsed_value)
49
+ template = @unparsed_value.dup
50
+ identifiers = []
51
+
52
+ while scanner.scan_until(QUOTED_PART_EXTRACTION_REGEXP)
53
+ quoted_part = scanner[0]
54
+ bare_identifier = scanner[2]
55
+ identifiers << bare_identifier
56
+ template.sub!(quoted_part, identifiers.index(bare_identifier).to_s)
57
+ end
58
+
59
+ parts_array = template.split('.').map{|part|
60
+ /\A\d+\z/.match(part) ? identifiers[part.to_i] : part
61
+ }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,109 @@
1
+ %w{function_call identifier string_literal}.each do |file|
2
+ require_relative file
3
+ end
4
+
5
+ module Bespoke
6
+ class Export
7
+ class Filter
8
+ class SelectTemplate
9
+ attr_accessor :template_string, :select_alias
10
+
11
+ TEMPLATE_CHECK_REGEXP = /<[^>]+>/
12
+
13
+ def self.compile(select_alias, template_string, custom_functions={})
14
+ new(select_alias, template_string, custom_functions).compile
15
+ end
16
+
17
+ def initialize(select_alias, template_string, custom_functions={})
18
+ @select_alias = select_alias
19
+ @template_string = template_string
20
+ @custom_functions = custom_functions
21
+ end
22
+
23
+ def compile
24
+ return Sequel.as(template_string, select_alias) if simple_string_template?
25
+ return column_definitions.first.as(select_alias) if single_column_template?
26
+ return Sequel.function(:concat_ws, *column_definitions, array_separator).as(select_alias) if joined_array_template?
27
+
28
+ concatenated_literals_and_columns
29
+ end
30
+
31
+ private
32
+
33
+ def array_separator
34
+ unique_string_literal_parts.first
35
+ end
36
+
37
+ def column_definitions
38
+ @column_definitions ||= extract_columns_from_template
39
+ end
40
+
41
+ def complex_template?
42
+ !!TEMPLATE_CHECK_REGEXP.match(template_string)
43
+ end
44
+
45
+ def concatenated_literals_and_columns
46
+ joined_parts = split_template_parts.zip(column_definitions).flatten.reject{|part| part == '' || part.nil?}
47
+
48
+ first_part = (joined_parts.first.is_a?(String) ? Sequel.expr(joined_parts.shift) : joined_parts.shift)
49
+
50
+ joined_parts.inject(first_part) {|ast, part|
51
+ ast + part
52
+ }.as(select_alias)
53
+ end
54
+
55
+ def extract_columns_from_template
56
+ template_string.scan(TEMPLATE_CHECK_REGEXP).map {|template_part|
57
+ unbracketed_template = template_part[1..-2].strip
58
+ identifiers = extract_identifiers(unbracketed_template)
59
+ (identifiers.length == 1 ? identifiers[0] : Sequel.function(:COALESCE, *identifiers))
60
+ }
61
+ end
62
+
63
+ def extract_identifiers(template_part)
64
+ template_part.split(/\s*\|\s*/).map {|string|
65
+ if StringLiteral.match?(string)
66
+ StringLiteral.extract_from(string)
67
+ elsif FunctionCall.match?(string)
68
+ FunctionCall.extract_from(string, @custom_functions)
69
+ else
70
+ Identifier.extract_from(string)
71
+ end
72
+ }
73
+ end
74
+
75
+ def joined_array_template?
76
+ single_unique_string_literal_part? && multiple_column_definitions?
77
+ end
78
+
79
+ def multiple_column_definitions?
80
+ column_definitions.size > 1
81
+ end
82
+
83
+ def simple_string_template?
84
+ !complex_template?
85
+ end
86
+
87
+ def single_column_template?
88
+ column_definitions.size == 1 && template_string_literals.empty?
89
+ end
90
+
91
+ def single_unique_string_literal_part?
92
+ unique_string_literal_parts.size == 1
93
+ end
94
+
95
+ def split_template_parts
96
+ @split_template_parts ||= template_string.split(TEMPLATE_CHECK_REGEXP)
97
+ end
98
+
99
+ def template_string_literals
100
+ @template_string_literals ||= split_template_parts.reject{|part| part == ""}
101
+ end
102
+
103
+ def unique_string_literal_parts
104
+ @unique_string_literal_parts ||= template_string_literals.uniq
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end