bespoke 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,11 +19,12 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.add_development_dependency "bundler", ">= 1.0.0"
21
21
  gem.add_development_dependency "rspec", "~> 2.6"
22
- gem.add_development_dependency "debugger"
22
+ gem.add_development_dependency 'guard'
23
+ gem.add_development_dependency 'guard-rspec'
23
24
 
24
25
  gem.add_dependency 'rake'
25
- gem.add_dependency 'docile'
26
26
  gem.add_dependency 'mustache'
27
27
  gem.add_dependency 'sqlite3'
28
28
  gem.add_dependency 'sequel'
29
+ gem.add_dependency 'activesupport'
29
30
  end
@@ -1,6 +1,20 @@
1
1
  require "bespoke/version"
2
2
 
3
+ module Bespoke
4
+ class Error < StandardError
5
+ unless instance_methods.include? :cause
6
+ attr_reader :cause
7
+
8
+ def initialize(msg, cause = $!)
9
+ super(msg)
10
+ @cause = cause
11
+ end
12
+ end
13
+ end
14
+ end
15
+
3
16
  require "bespoke/template"
4
17
  require "bespoke/xsltproc"
5
18
  require "bespoke/projection"
6
19
  require "bespoke/join"
20
+ require 'bespoke/export'
@@ -1,6 +1,5 @@
1
1
  require "bespoke"
2
- require "docile"
3
2
 
4
3
  def projection(name, xpath_root=nil, &block)
5
- Docile.dsl_eval(Bespoke::Projection.new(name, xpath_root), &block)
6
- end
4
+ Bespoke::Projection.new(name, xpath_root, &block)
5
+ end
@@ -0,0 +1,7 @@
1
+ module Bespoke
2
+ class Export
3
+
4
+ end
5
+ end
6
+
7
+ require_relative 'export/filter'
@@ -0,0 +1,154 @@
1
+ require_relative 'filter/select_template'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ attr_reader :templates, :data_set
7
+
8
+ # Constructs a new filter
9
+ #
10
+ # @param dataset [Sequel::Dataset] a Sequel::Dataset, this is genrally
11
+ # obtained using #[] on an instance of a Sequel::Database but, can also
12
+ # be the output of another filter's `#apply` method.
13
+ #
14
+ # @param templates [Hash{Symbol => String, Array, Hash{String, Symbol => String, Symbol}}]
15
+ # A hash of hashes specifying columns/aliases for select clause output,
16
+ # joins to perform (only performs left joins right now, this needs some
17
+ # work) and finally a where clause to filter the results. The selects,
18
+ # joins and where keys MUST be symbols, the nested hashes (when used)
19
+ # MAY have either String or Symbol keys.
20
+ #
21
+ # :selects key ==
22
+ # The selects key is most useful as a hash, the keys will get used as aliases
23
+ # for the specified template strings or symbol column defniitions.
24
+ #
25
+ # The following examples cover most use cases:
26
+ # ```
27
+ # # basic column specifications
28
+ # { first_name: :students__first_name } # => `students`.`first_name` AS 'first_name'
29
+ # { 'first_name' => '<students.first_name>' } # => `students`.`first_name` AS 'first_name'
30
+ # { 'first_name' => :first_name } # => `first_name` AS 'first_name'
31
+ #
32
+ # # generate arbitray output strings:
33
+ # { 'user_id' => 'student_<students.id>' } # => ('student_' || `students`.`first_name`) AS 'first_name'
34
+ #
35
+ # # use periods in table/column names by quoting
36
+ # { 'first_name' => '<students."name.first_name">' } # => `students`.`name.first_name` AS 'first_name'
37
+ #
38
+ # # use backticks as quotes
39
+ # { 'first_name' => '<students.`name.first_name`>' } # => `students`.`name.first_name` AS 'first_name'
40
+ #
41
+ # # coalesce values using a pipe `|` and supply a default value using single quotes
42
+ # { 'course_name' => "<staff.admin_username | staff.teacher_username | 'default_value'>" }
43
+ # # => COALESCE(`staff`.`admin_username`, `staff`.`teacher_username`, 'default_value') AS 'course_name'
44
+ #
45
+ # # use SQL functions (N.B. this currently does no manipulation on arguments so they remain strings)
46
+ # { 'course_length' => "<length('foobar')>" } # => LENGTH('foobar') AS 'course_length'
47
+ #
48
+ # # delimited arrays are specially handled using concat_ws (which rejects NULLS before concatenating the results):
49
+ # { 'course_name' => '<courses.name>-<terms.abbreviation>-<staff."name.last_name">' }
50
+ # # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '-') AS 'course_name'
51
+ #
52
+ # # in delimited arrays the delimiter can be any arbitrary sequence:
53
+ # { 'course_name' => '<courses.name>=+=<terms.abbreviation>=+=<staff."name.last_name">' }
54
+ # # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '=+=') AS 'course_name'
55
+ # ```
56
+ #
57
+ # :joins key ==
58
+ # The joins key is somewhat less flexible in what it will accept and
59
+ # generate but it still meets current needs.
60
+ #
61
+ # ```
62
+ # # in hash specifications symbols and strings are interchangeable
63
+ # # assuming we're selecting from a table named `foos`
64
+ # { bars: [:id, :foo_id] } # => LEFT JOIN `bars` ON `foos`.`id` = `bars`.`foo_id`
65
+ # [:bars, :quxes] # => LEFT JOIN `bars` LEFT JOIN `quxes`
66
+ # ```
67
+ #
68
+ # :where key ==
69
+ # This is even more limited than the joins key and I don't anticipate
70
+ # it being extended because for these transforms there isn't much
71
+ # filtering to be done.
72
+ # This key will only accept a string that gets inserted directly into
73
+ # the SQL, after extensive testing this doesn't appear to present any
74
+ # SQLi potential since only the first statement gets executed and no
75
+ # additional information can be pulled into the query at this point
76
+ # due to theis being the last segment we support. A user can still,
77
+ # however, cause the query to be invalid or reduce the amount of data
78
+ # returned.
79
+
80
+ def initialize(data_set, templates)
81
+ @data_set = data_set
82
+ @set_name = data_set.opts[:from].first
83
+ @templates = templates
84
+ end
85
+
86
+ # Applys the selects and joins to the passed dataset, returns a
87
+ # new Dataset which can be interacted with using Enumerable methods
88
+ # as well as any other methods on Sequel::Dataset
89
+ def apply
90
+ ast = apply_selects(data_set, templates[:selects], templates[:functions])
91
+ ast = apply_joins(ast, templates[:joins])
92
+ apply_where(ast, templates[:where])
93
+ end
94
+
95
+ # #sql gives access to the SQL generated by applying the filters to the
96
+ # dataset, this is mostly for diagnositcs and testing since other code
97
+ # can interact with the dataset returned from #apply using Enumerable
98
+ # methods
99
+ def sql
100
+ apply.sql
101
+ end
102
+
103
+ private
104
+
105
+ # TODO: figure out how to allow specification of what type of join we're doing
106
+ # as well as being able to join tables to ones we've already joined, maybe
107
+ # we can do something like:
108
+ # {table_three: ['<table_two.id>', '<table_three.two_id>', 'inner']}
109
+ # with the last part being optional with the default being left
110
+ def apply_joins(ast, joins)
111
+ return ast unless joins
112
+ case joins
113
+ when String
114
+ ast.join(joins)
115
+ when Array
116
+ joins.inject(ast) {|ast, join|
117
+ ast.left_join(join)
118
+ }
119
+ when Hash
120
+ joins.inject(ast) {|ast, (joined_table, columns)|
121
+ ast.left_join(joined_table, Sequel.qualify(@set_name, columns[0]) => Sequel.qualify(joined_table, columns[1]))
122
+ }
123
+ else
124
+ raise "Unsupported type (#{ joins.class }) in joins key for filter template"
125
+ end
126
+ end
127
+
128
+ def apply_selects(ast, selects, functions)
129
+ return ast unless selects
130
+ case selects
131
+ when String
132
+ ast.select(selects)
133
+ when Hash
134
+ compiled_selects = selects.map do |(select_alias, template_string)|
135
+ SelectTemplate.compile(select_alias, template_string, functions || {})
136
+ end
137
+ ast.select(*compiled_selects)
138
+ else
139
+ raise "Unsupported type (#{ selects.class }) in selects key for filter template"
140
+ end
141
+ end
142
+
143
+ def apply_where(ast, where)
144
+ return ast unless where
145
+ case where
146
+ when String
147
+ ast.where(where)
148
+ else
149
+ raise "Unsupported type (#{ where.class }) in where key for filter template"
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,61 @@
1
+ require 'active_support/core_ext/hash'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ class FunctionCall
7
+ EXTRACTION_REGEXP = /\A(?<function>[^\(]+)\((?<args>[^\)]*)\)\z/
8
+
9
+ DEFAULT_CUSTOM_FUNCTIONS = {
10
+ 'random_str' => ->(args) { "LOWER(HEX(RANDOMBLOB(#{ args[0].to_i })))" },
11
+ }.with_indifferent_access
12
+
13
+ attr_reader :custom_functions
14
+
15
+ def self.match?(string)
16
+ !!EXTRACTION_REGEXP.match(string.strip)
17
+ end
18
+
19
+ def self.extract_from(string, custom_functions={})
20
+ new(string, custom_functions).extract
21
+ end
22
+
23
+ def initialize(string, custom_functions={})
24
+ @string = string.strip
25
+ @custom_functions = DEFAULT_CUSTOM_FUNCTIONS.merge(custom_functions)
26
+ end
27
+
28
+ def extract
29
+ if custom_function?
30
+ Sequel.lit(compile_custom_function)
31
+ else
32
+ Sequel.function(function.upcase, *arguments)
33
+ end
34
+ end
35
+
36
+ def function
37
+ call_parts[:function]
38
+ end
39
+
40
+ # TODO: Make this type cast integers, booleans and possibly identifiers
41
+ def arguments
42
+ @arguments ||= call_parts[:args].split(',').map(&:strip)
43
+ end
44
+
45
+ private
46
+
47
+ def compile_custom_function
48
+ @custom_functions[function.downcase].call(arguments)
49
+ end
50
+
51
+ def custom_function?
52
+ @custom_functions.keys.include? function.downcase
53
+ end
54
+
55
+ def call_parts
56
+ @function_parts ||= EXTRACTION_REGEXP.match(@string)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,66 @@
1
+ require 'strscan'
2
+
3
+ module Bespoke
4
+ class Export
5
+ class Filter
6
+ class Identifier
7
+ def self.extract_from(value)
8
+ new(value).sequel_identifier
9
+ end
10
+
11
+ def initialize(unparsed_value)
12
+ @unparsed_value = unparsed_value.dup.strip.freeze
13
+ end
14
+
15
+ def sequel_identifier
16
+ table.nil? ? Sequel.identifier(column) : Sequel.qualify(table, column)
17
+ end
18
+
19
+ def column
20
+ parts[:column]
21
+ end
22
+
23
+ def table
24
+ parts[:table]
25
+ end
26
+
27
+ QUOTED_PART_EXTRACTION_REGEXP = /(?<quote>["`])(?<value>\S+?)\k<quote>/
28
+ def parts
29
+ return @parts if @parts
30
+ if quoted_identifier?
31
+ parts_array = extract_parts_from_quoted_identifier
32
+ else
33
+ parts_array = @unparsed_value.split('.')
34
+ end
35
+ @parts = {
36
+ table: (parts_array.length == 1 ? nil : parts_array.first),
37
+ column: parts_array.last,
38
+ }
39
+ end
40
+
41
+ private
42
+
43
+ def quoted_identifier?
44
+ !!QUOTED_PART_EXTRACTION_REGEXP.match(@unparsed_value)
45
+ end
46
+
47
+ def extract_parts_from_quoted_identifier
48
+ scanner = StringScanner.new(@unparsed_value)
49
+ template = @unparsed_value.dup
50
+ identifiers = []
51
+
52
+ while scanner.scan_until(QUOTED_PART_EXTRACTION_REGEXP)
53
+ quoted_part = scanner[0]
54
+ bare_identifier = scanner[2]
55
+ identifiers << bare_identifier
56
+ template.sub!(quoted_part, identifiers.index(bare_identifier).to_s)
57
+ end
58
+
59
+ parts_array = template.split('.').map{|part|
60
+ /\A\d+\z/.match(part) ? identifiers[part.to_i] : part
61
+ }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,109 @@
1
+ %w{function_call identifier string_literal}.each do |file|
2
+ require_relative file
3
+ end
4
+
5
+ module Bespoke
6
+ class Export
7
+ class Filter
8
+ class SelectTemplate
9
+ attr_accessor :template_string, :select_alias
10
+
11
+ TEMPLATE_CHECK_REGEXP = /<[^>]+>/
12
+
13
+ def self.compile(select_alias, template_string, custom_functions={})
14
+ new(select_alias, template_string, custom_functions).compile
15
+ end
16
+
17
+ def initialize(select_alias, template_string, custom_functions={})
18
+ @select_alias = select_alias
19
+ @template_string = template_string
20
+ @custom_functions = custom_functions
21
+ end
22
+
23
+ def compile
24
+ return Sequel.as(template_string, select_alias) if simple_string_template?
25
+ return column_definitions.first.as(select_alias) if single_column_template?
26
+ return Sequel.function(:concat_ws, *column_definitions, array_separator).as(select_alias) if joined_array_template?
27
+
28
+ concatenated_literals_and_columns
29
+ end
30
+
31
+ private
32
+
33
+ def array_separator
34
+ unique_string_literal_parts.first
35
+ end
36
+
37
+ def column_definitions
38
+ @column_definitions ||= extract_columns_from_template
39
+ end
40
+
41
+ def complex_template?
42
+ !!TEMPLATE_CHECK_REGEXP.match(template_string)
43
+ end
44
+
45
+ def concatenated_literals_and_columns
46
+ joined_parts = split_template_parts.zip(column_definitions).flatten.reject{|part| part == '' || part.nil?}
47
+
48
+ first_part = (joined_parts.first.is_a?(String) ? Sequel.expr(joined_parts.shift) : joined_parts.shift)
49
+
50
+ joined_parts.inject(first_part) {|ast, part|
51
+ ast + part
52
+ }.as(select_alias)
53
+ end
54
+
55
+ def extract_columns_from_template
56
+ template_string.scan(TEMPLATE_CHECK_REGEXP).map {|template_part|
57
+ unbracketed_template = template_part[1..-2].strip
58
+ identifiers = extract_identifiers(unbracketed_template)
59
+ (identifiers.length == 1 ? identifiers[0] : Sequel.function(:COALESCE, *identifiers))
60
+ }
61
+ end
62
+
63
+ def extract_identifiers(template_part)
64
+ template_part.split(/\s*\|\s*/).map {|string|
65
+ if StringLiteral.match?(string)
66
+ StringLiteral.extract_from(string)
67
+ elsif FunctionCall.match?(string)
68
+ FunctionCall.extract_from(string, @custom_functions)
69
+ else
70
+ Identifier.extract_from(string)
71
+ end
72
+ }
73
+ end
74
+
75
+ def joined_array_template?
76
+ single_unique_string_literal_part? && multiple_column_definitions?
77
+ end
78
+
79
+ def multiple_column_definitions?
80
+ column_definitions.size > 1
81
+ end
82
+
83
+ def simple_string_template?
84
+ !complex_template?
85
+ end
86
+
87
+ def single_column_template?
88
+ column_definitions.size == 1 && template_string_literals.empty?
89
+ end
90
+
91
+ def single_unique_string_literal_part?
92
+ unique_string_literal_parts.size == 1
93
+ end
94
+
95
+ def split_template_parts
96
+ @split_template_parts ||= template_string.split(TEMPLATE_CHECK_REGEXP)
97
+ end
98
+
99
+ def template_string_literals
100
+ @template_string_literals ||= split_template_parts.reject{|part| part == ""}
101
+ end
102
+
103
+ def unique_string_literal_parts
104
+ @unique_string_literal_parts ||= template_string_literals.uniq
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end