bespoke 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bespoke.gemspec +3 -2
- data/lib/bespoke.rb +14 -0
- data/lib/bespoke/dsl.rb +2 -3
- data/lib/bespoke/export.rb +7 -0
- data/lib/bespoke/export/filter.rb +154 -0
- data/lib/bespoke/export/filter/function_call.rb +61 -0
- data/lib/bespoke/export/filter/identifier.rb +66 -0
- data/lib/bespoke/export/filter/select_template.rb +109 -0
- data/lib/bespoke/export/filter/string_literal.rb +16 -0
- data/lib/bespoke/join.rb +15 -8
- data/lib/bespoke/mustache.rb +17 -0
- data/lib/bespoke/projection.rb +31 -17
- data/lib/bespoke/template.rb +2 -2
- data/lib/bespoke/typed_field.rb +30 -0
- data/lib/bespoke/version.rb +1 -1
- data/spec/fixtures/sif.student_personal.xslt +1 -1
- data/spec/fixtures/sif.xslt.mustache +1 -1
- data/spec/lib/bespoke/export/filter/function_call_spec.rb +72 -0
- data/spec/lib/bespoke/export/filter/identifier_spec.rb +62 -0
- data/spec/lib/bespoke/export/filter/select_template_spec.rb +102 -0
- data/spec/lib/bespoke/export/filter_spec.rb +102 -0
- data/spec/lib/bespoke/export_spec.rb +6 -0
- data/spec/lib/bespoke/join_spec.rb +59 -0
- data/spec/lib/bespoke/projection_spec.rb +94 -0
- data/spec/{template_spec.rb → lib/bespoke/template_spec.rb} +3 -3
- data/spec/lib/bespoke/typed_field_spec.rb +101 -0
- data/spec/{xsltproc_spec.rb → lib/bespoke/xsltproc_spec.rb} +1 -1
- data/spec/spec_helper.rb +16 -3
- data/spec/support/fixture_helpers.rb +24 -0
- metadata +52 -12
- data/spec/join_spec.rb +0 -44
data/bespoke.gemspec
CHANGED
@@ -19,11 +19,12 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_development_dependency "bundler", ">= 1.0.0"
|
21
21
|
gem.add_development_dependency "rspec", "~> 2.6"
|
22
|
-
gem.add_development_dependency
|
22
|
+
gem.add_development_dependency 'guard'
|
23
|
+
gem.add_development_dependency 'guard-rspec'
|
23
24
|
|
24
25
|
gem.add_dependency 'rake'
|
25
|
-
gem.add_dependency 'docile'
|
26
26
|
gem.add_dependency 'mustache'
|
27
27
|
gem.add_dependency 'sqlite3'
|
28
28
|
gem.add_dependency 'sequel'
|
29
|
+
gem.add_dependency 'activesupport'
|
29
30
|
end
|
data/lib/bespoke.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require "bespoke/version"
|
2
2
|
|
3
|
+
module Bespoke
|
4
|
+
class Error < StandardError
|
5
|
+
unless instance_methods.include? :cause
|
6
|
+
attr_reader :cause
|
7
|
+
|
8
|
+
def initialize(msg, cause = $!)
|
9
|
+
super(msg)
|
10
|
+
@cause = cause
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
3
16
|
require "bespoke/template"
|
4
17
|
require "bespoke/xsltproc"
|
5
18
|
require "bespoke/projection"
|
6
19
|
require "bespoke/join"
|
20
|
+
require 'bespoke/export'
|
data/lib/bespoke/dsl.rb
CHANGED
@@ -0,0 +1,154 @@
|
|
1
|
+
require_relative 'filter/select_template'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
attr_reader :templates, :data_set
|
7
|
+
|
8
|
+
# Constructs a new filter
|
9
|
+
#
|
10
|
+
# @param dataset [Sequel::Dataset] a Sequel::Dataset, this is genrally
|
11
|
+
# obtained using #[] on an instance of a Sequel::Database but, can also
|
12
|
+
# be the output of another filter's `#apply` method.
|
13
|
+
#
|
14
|
+
# @param templates [Hash{Symbol => String, Array, Hash{String, Symbol => String, Symbol}}]
|
15
|
+
# A hash of hashes specifying columns/aliases for select clause output,
|
16
|
+
# joins to perform (only performs left joins right now, this needs some
|
17
|
+
# work) and finally a where clause to filter the results. The selects,
|
18
|
+
# joins and where keys MUST be symbols, the nested hashes (when used)
|
19
|
+
# MAY have either String or Symbol keys.
|
20
|
+
#
|
21
|
+
# :selects key ==
|
22
|
+
# The selects key is most useful as a hash, the keys will get used as aliases
|
23
|
+
# for the specified template strings or symbol column defniitions.
|
24
|
+
#
|
25
|
+
# The following examples cover most use cases:
|
26
|
+
# ```
|
27
|
+
# # basic column specifications
|
28
|
+
# { first_name: :students__first_name } # => `students`.`first_name` AS 'first_name'
|
29
|
+
# { 'first_name' => '<students.first_name>' } # => `students`.`first_name` AS 'first_name'
|
30
|
+
# { 'first_name' => :first_name } # => `first_name` AS 'first_name'
|
31
|
+
#
|
32
|
+
# # generate arbitray output strings:
|
33
|
+
# { 'user_id' => 'student_<students.id>' } # => ('student_' || `students`.`first_name`) AS 'first_name'
|
34
|
+
#
|
35
|
+
# # use periods in table/column names by quoting
|
36
|
+
# { 'first_name' => '<students."name.first_name">' } # => `students`.`name.first_name` AS 'first_name'
|
37
|
+
#
|
38
|
+
# # use backticks as quotes
|
39
|
+
# { 'first_name' => '<students.`name.first_name`>' } # => `students`.`name.first_name` AS 'first_name'
|
40
|
+
#
|
41
|
+
# # coalesce values using a pipe `|` and supply a default value using single quotes
|
42
|
+
# { 'course_name' => "<staff.admin_username | staff.teacher_username | 'default_value'>" }
|
43
|
+
# # => COALESCE(`staff`.`admin_username`, `staff`.`teacher_username`, 'default_value') AS 'course_name'
|
44
|
+
#
|
45
|
+
# # use SQL functions (N.B. this currently does no manipulation on arguments so they remain strings)
|
46
|
+
# { 'course_length' => "<length('foobar')>" } # => LENGTH('foobar') AS 'course_length'
|
47
|
+
#
|
48
|
+
# # delimited arrays are specially handled using concat_ws (which rejects NULLS before concatenating the results):
|
49
|
+
# { 'course_name' => '<courses.name>-<terms.abbreviation>-<staff."name.last_name">' }
|
50
|
+
# # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '-') AS 'course_name'
|
51
|
+
#
|
52
|
+
# # in delimited arrays the delimiter can be any arbitrary sequence:
|
53
|
+
# { 'course_name' => '<courses.name>=+=<terms.abbreviation>=+=<staff."name.last_name">' }
|
54
|
+
# # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '=+=') AS 'course_name'
|
55
|
+
# ```
|
56
|
+
#
|
57
|
+
# :joins key ==
|
58
|
+
# The joins key is somewhat less flexible in what it will accept and
|
59
|
+
# generate but it still meets current needs.
|
60
|
+
#
|
61
|
+
# ```
|
62
|
+
# # in hash specifications symbols and strings are interchangeable
|
63
|
+
# # assuming we're selecting from a table named `foos`
|
64
|
+
# { bars: [:id, :foo_id] } # => LEFT JOIN `bars` ON `foos`.`id` = `bars`.`foo_id`
|
65
|
+
# [:bars, :quxes] # => LEFT JOIN `bars` LEFT JOIN `quxes`
|
66
|
+
# ```
|
67
|
+
#
|
68
|
+
# :where key ==
|
69
|
+
# This is even more limited than the joins key and I don't anticipate
|
70
|
+
# it being extended because for these transforms there isn't much
|
71
|
+
# filtering to be done.
|
72
|
+
# This key will only accept a string that gets inserted directly into
|
73
|
+
# the SQL, after extensive testing this doesn't appear to present any
|
74
|
+
# SQLi potential since only the first statement gets executed and no
|
75
|
+
# additional information can be pulled into the query at this point
|
76
|
+
# due to theis being the last segment we support. A user can still,
|
77
|
+
# however, cause the query to be invalid or reduce the amount of data
|
78
|
+
# returned.
|
79
|
+
|
80
|
+
def initialize(data_set, templates)
|
81
|
+
@data_set = data_set
|
82
|
+
@set_name = data_set.opts[:from].first
|
83
|
+
@templates = templates
|
84
|
+
end
|
85
|
+
|
86
|
+
# Applys the selects and joins to the passed dataset, returns a
|
87
|
+
# new Dataset which can be interacted with using Enumerable methods
|
88
|
+
# as well as any other methods on Sequel::Dataset
|
89
|
+
def apply
|
90
|
+
ast = apply_selects(data_set, templates[:selects], templates[:functions])
|
91
|
+
ast = apply_joins(ast, templates[:joins])
|
92
|
+
apply_where(ast, templates[:where])
|
93
|
+
end
|
94
|
+
|
95
|
+
# #sql gives access to the SQL generated by applying the filters to the
|
96
|
+
# dataset, this is mostly for diagnositcs and testing since other code
|
97
|
+
# can interact with the dataset returned from #apply using Enumerable
|
98
|
+
# methods
|
99
|
+
def sql
|
100
|
+
apply.sql
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# TODO: figure out how to allow specification of what type of join we're doing
|
106
|
+
# as well as being able to join tables to ones we've already joined, maybe
|
107
|
+
# we can do something like:
|
108
|
+
# {table_three: ['<table_two.id>', '<table_three.two_id>', 'inner']}
|
109
|
+
# with the last part being optional with the default being left
|
110
|
+
def apply_joins(ast, joins)
|
111
|
+
return ast unless joins
|
112
|
+
case joins
|
113
|
+
when String
|
114
|
+
ast.join(joins)
|
115
|
+
when Array
|
116
|
+
joins.inject(ast) {|ast, join|
|
117
|
+
ast.left_join(join)
|
118
|
+
}
|
119
|
+
when Hash
|
120
|
+
joins.inject(ast) {|ast, (joined_table, columns)|
|
121
|
+
ast.left_join(joined_table, Sequel.qualify(@set_name, columns[0]) => Sequel.qualify(joined_table, columns[1]))
|
122
|
+
}
|
123
|
+
else
|
124
|
+
raise "Unsupported type (#{ joins.class }) in joins key for filter template"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def apply_selects(ast, selects, functions)
|
129
|
+
return ast unless selects
|
130
|
+
case selects
|
131
|
+
when String
|
132
|
+
ast.select(selects)
|
133
|
+
when Hash
|
134
|
+
compiled_selects = selects.map do |(select_alias, template_string)|
|
135
|
+
SelectTemplate.compile(select_alias, template_string, functions || {})
|
136
|
+
end
|
137
|
+
ast.select(*compiled_selects)
|
138
|
+
else
|
139
|
+
raise "Unsupported type (#{ selects.class }) in selects key for filter template"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def apply_where(ast, where)
|
144
|
+
return ast unless where
|
145
|
+
case where
|
146
|
+
when String
|
147
|
+
ast.where(where)
|
148
|
+
else
|
149
|
+
raise "Unsupported type (#{ where.class }) in where key for filter template"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
class FunctionCall
|
7
|
+
EXTRACTION_REGEXP = /\A(?<function>[^\(]+)\((?<args>[^\)]*)\)\z/
|
8
|
+
|
9
|
+
DEFAULT_CUSTOM_FUNCTIONS = {
|
10
|
+
'random_str' => ->(args) { "LOWER(HEX(RANDOMBLOB(#{ args[0].to_i })))" },
|
11
|
+
}.with_indifferent_access
|
12
|
+
|
13
|
+
attr_reader :custom_functions
|
14
|
+
|
15
|
+
def self.match?(string)
|
16
|
+
!!EXTRACTION_REGEXP.match(string.strip)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.extract_from(string, custom_functions={})
|
20
|
+
new(string, custom_functions).extract
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(string, custom_functions={})
|
24
|
+
@string = string.strip
|
25
|
+
@custom_functions = DEFAULT_CUSTOM_FUNCTIONS.merge(custom_functions)
|
26
|
+
end
|
27
|
+
|
28
|
+
def extract
|
29
|
+
if custom_function?
|
30
|
+
Sequel.lit(compile_custom_function)
|
31
|
+
else
|
32
|
+
Sequel.function(function.upcase, *arguments)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def function
|
37
|
+
call_parts[:function]
|
38
|
+
end
|
39
|
+
|
40
|
+
# TODO: Make this type cast integers, booleans and possibly identifiers
|
41
|
+
def arguments
|
42
|
+
@arguments ||= call_parts[:args].split(',').map(&:strip)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def compile_custom_function
|
48
|
+
@custom_functions[function.downcase].call(arguments)
|
49
|
+
end
|
50
|
+
|
51
|
+
def custom_function?
|
52
|
+
@custom_functions.keys.include? function.downcase
|
53
|
+
end
|
54
|
+
|
55
|
+
def call_parts
|
56
|
+
@function_parts ||= EXTRACTION_REGEXP.match(@string)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
class Identifier
|
7
|
+
def self.extract_from(value)
|
8
|
+
new(value).sequel_identifier
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(unparsed_value)
|
12
|
+
@unparsed_value = unparsed_value.dup.strip.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
def sequel_identifier
|
16
|
+
table.nil? ? Sequel.identifier(column) : Sequel.qualify(table, column)
|
17
|
+
end
|
18
|
+
|
19
|
+
def column
|
20
|
+
parts[:column]
|
21
|
+
end
|
22
|
+
|
23
|
+
def table
|
24
|
+
parts[:table]
|
25
|
+
end
|
26
|
+
|
27
|
+
QUOTED_PART_EXTRACTION_REGEXP = /(?<quote>["`])(?<value>\S+?)\k<quote>/
|
28
|
+
def parts
|
29
|
+
return @parts if @parts
|
30
|
+
if quoted_identifier?
|
31
|
+
parts_array = extract_parts_from_quoted_identifier
|
32
|
+
else
|
33
|
+
parts_array = @unparsed_value.split('.')
|
34
|
+
end
|
35
|
+
@parts = {
|
36
|
+
table: (parts_array.length == 1 ? nil : parts_array.first),
|
37
|
+
column: parts_array.last,
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def quoted_identifier?
|
44
|
+
!!QUOTED_PART_EXTRACTION_REGEXP.match(@unparsed_value)
|
45
|
+
end
|
46
|
+
|
47
|
+
def extract_parts_from_quoted_identifier
|
48
|
+
scanner = StringScanner.new(@unparsed_value)
|
49
|
+
template = @unparsed_value.dup
|
50
|
+
identifiers = []
|
51
|
+
|
52
|
+
while scanner.scan_until(QUOTED_PART_EXTRACTION_REGEXP)
|
53
|
+
quoted_part = scanner[0]
|
54
|
+
bare_identifier = scanner[2]
|
55
|
+
identifiers << bare_identifier
|
56
|
+
template.sub!(quoted_part, identifiers.index(bare_identifier).to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
parts_array = template.split('.').map{|part|
|
60
|
+
/\A\d+\z/.match(part) ? identifiers[part.to_i] : part
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
%w{function_call identifier string_literal}.each do |file|
|
2
|
+
require_relative file
|
3
|
+
end
|
4
|
+
|
5
|
+
module Bespoke
|
6
|
+
class Export
|
7
|
+
class Filter
|
8
|
+
class SelectTemplate
|
9
|
+
attr_accessor :template_string, :select_alias
|
10
|
+
|
11
|
+
TEMPLATE_CHECK_REGEXP = /<[^>]+>/
|
12
|
+
|
13
|
+
def self.compile(select_alias, template_string, custom_functions={})
|
14
|
+
new(select_alias, template_string, custom_functions).compile
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(select_alias, template_string, custom_functions={})
|
18
|
+
@select_alias = select_alias
|
19
|
+
@template_string = template_string
|
20
|
+
@custom_functions = custom_functions
|
21
|
+
end
|
22
|
+
|
23
|
+
def compile
|
24
|
+
return Sequel.as(template_string, select_alias) if simple_string_template?
|
25
|
+
return column_definitions.first.as(select_alias) if single_column_template?
|
26
|
+
return Sequel.function(:concat_ws, *column_definitions, array_separator).as(select_alias) if joined_array_template?
|
27
|
+
|
28
|
+
concatenated_literals_and_columns
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def array_separator
|
34
|
+
unique_string_literal_parts.first
|
35
|
+
end
|
36
|
+
|
37
|
+
def column_definitions
|
38
|
+
@column_definitions ||= extract_columns_from_template
|
39
|
+
end
|
40
|
+
|
41
|
+
def complex_template?
|
42
|
+
!!TEMPLATE_CHECK_REGEXP.match(template_string)
|
43
|
+
end
|
44
|
+
|
45
|
+
def concatenated_literals_and_columns
|
46
|
+
joined_parts = split_template_parts.zip(column_definitions).flatten.reject{|part| part == '' || part.nil?}
|
47
|
+
|
48
|
+
first_part = (joined_parts.first.is_a?(String) ? Sequel.expr(joined_parts.shift) : joined_parts.shift)
|
49
|
+
|
50
|
+
joined_parts.inject(first_part) {|ast, part|
|
51
|
+
ast + part
|
52
|
+
}.as(select_alias)
|
53
|
+
end
|
54
|
+
|
55
|
+
def extract_columns_from_template
|
56
|
+
template_string.scan(TEMPLATE_CHECK_REGEXP).map {|template_part|
|
57
|
+
unbracketed_template = template_part[1..-2].strip
|
58
|
+
identifiers = extract_identifiers(unbracketed_template)
|
59
|
+
(identifiers.length == 1 ? identifiers[0] : Sequel.function(:COALESCE, *identifiers))
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def extract_identifiers(template_part)
|
64
|
+
template_part.split(/\s*\|\s*/).map {|string|
|
65
|
+
if StringLiteral.match?(string)
|
66
|
+
StringLiteral.extract_from(string)
|
67
|
+
elsif FunctionCall.match?(string)
|
68
|
+
FunctionCall.extract_from(string, @custom_functions)
|
69
|
+
else
|
70
|
+
Identifier.extract_from(string)
|
71
|
+
end
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def joined_array_template?
|
76
|
+
single_unique_string_literal_part? && multiple_column_definitions?
|
77
|
+
end
|
78
|
+
|
79
|
+
def multiple_column_definitions?
|
80
|
+
column_definitions.size > 1
|
81
|
+
end
|
82
|
+
|
83
|
+
def simple_string_template?
|
84
|
+
!complex_template?
|
85
|
+
end
|
86
|
+
|
87
|
+
def single_column_template?
|
88
|
+
column_definitions.size == 1 && template_string_literals.empty?
|
89
|
+
end
|
90
|
+
|
91
|
+
def single_unique_string_literal_part?
|
92
|
+
unique_string_literal_parts.size == 1
|
93
|
+
end
|
94
|
+
|
95
|
+
def split_template_parts
|
96
|
+
@split_template_parts ||= template_string.split(TEMPLATE_CHECK_REGEXP)
|
97
|
+
end
|
98
|
+
|
99
|
+
def template_string_literals
|
100
|
+
@template_string_literals ||= split_template_parts.reject{|part| part == ""}
|
101
|
+
end
|
102
|
+
|
103
|
+
def unique_string_literal_parts
|
104
|
+
@unique_string_literal_parts ||= template_string_literals.uniq
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|