bespoke 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bespoke.gemspec +3 -2
- data/lib/bespoke.rb +14 -0
- data/lib/bespoke/dsl.rb +2 -3
- data/lib/bespoke/export.rb +7 -0
- data/lib/bespoke/export/filter.rb +154 -0
- data/lib/bespoke/export/filter/function_call.rb +61 -0
- data/lib/bespoke/export/filter/identifier.rb +66 -0
- data/lib/bespoke/export/filter/select_template.rb +109 -0
- data/lib/bespoke/export/filter/string_literal.rb +16 -0
- data/lib/bespoke/join.rb +15 -8
- data/lib/bespoke/mustache.rb +17 -0
- data/lib/bespoke/projection.rb +31 -17
- data/lib/bespoke/template.rb +2 -2
- data/lib/bespoke/typed_field.rb +30 -0
- data/lib/bespoke/version.rb +1 -1
- data/spec/fixtures/sif.student_personal.xslt +1 -1
- data/spec/fixtures/sif.xslt.mustache +1 -1
- data/spec/lib/bespoke/export/filter/function_call_spec.rb +72 -0
- data/spec/lib/bespoke/export/filter/identifier_spec.rb +62 -0
- data/spec/lib/bespoke/export/filter/select_template_spec.rb +102 -0
- data/spec/lib/bespoke/export/filter_spec.rb +102 -0
- data/spec/lib/bespoke/export_spec.rb +6 -0
- data/spec/lib/bespoke/join_spec.rb +59 -0
- data/spec/lib/bespoke/projection_spec.rb +94 -0
- data/spec/{template_spec.rb → lib/bespoke/template_spec.rb} +3 -3
- data/spec/lib/bespoke/typed_field_spec.rb +101 -0
- data/spec/{xsltproc_spec.rb → lib/bespoke/xsltproc_spec.rb} +1 -1
- data/spec/spec_helper.rb +16 -3
- data/spec/support/fixture_helpers.rb +24 -0
- metadata +52 -12
- data/spec/join_spec.rb +0 -44
data/bespoke.gemspec
CHANGED
@@ -19,11 +19,12 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_development_dependency "bundler", ">= 1.0.0"
|
21
21
|
gem.add_development_dependency "rspec", "~> 2.6"
|
22
|
-
gem.add_development_dependency
|
22
|
+
gem.add_development_dependency 'guard'
|
23
|
+
gem.add_development_dependency 'guard-rspec'
|
23
24
|
|
24
25
|
gem.add_dependency 'rake'
|
25
|
-
gem.add_dependency 'docile'
|
26
26
|
gem.add_dependency 'mustache'
|
27
27
|
gem.add_dependency 'sqlite3'
|
28
28
|
gem.add_dependency 'sequel'
|
29
|
+
gem.add_dependency 'activesupport'
|
29
30
|
end
|
data/lib/bespoke.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require "bespoke/version"
|
2
2
|
|
3
|
+
module Bespoke
|
4
|
+
class Error < StandardError
|
5
|
+
unless instance_methods.include? :cause
|
6
|
+
attr_reader :cause
|
7
|
+
|
8
|
+
def initialize(msg, cause = $!)
|
9
|
+
super(msg)
|
10
|
+
@cause = cause
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
3
16
|
require "bespoke/template"
|
4
17
|
require "bespoke/xsltproc"
|
5
18
|
require "bespoke/projection"
|
6
19
|
require "bespoke/join"
|
20
|
+
require 'bespoke/export'
|
data/lib/bespoke/dsl.rb
CHANGED
@@ -0,0 +1,154 @@
|
|
1
|
+
require_relative 'filter/select_template'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
attr_reader :templates, :data_set
|
7
|
+
|
8
|
+
# Constructs a new filter
|
9
|
+
#
|
10
|
+
# @param dataset [Sequel::Dataset] a Sequel::Dataset, this is genrally
|
11
|
+
# obtained using #[] on an instance of a Sequel::Database but, can also
|
12
|
+
# be the output of another filter's `#apply` method.
|
13
|
+
#
|
14
|
+
# @param templates [Hash{Symbol => String, Array, Hash{String, Symbol => String, Symbol}}]
|
15
|
+
# A hash of hashes specifying columns/aliases for select clause output,
|
16
|
+
# joins to perform (only performs left joins right now, this needs some
|
17
|
+
# work) and finally a where clause to filter the results. The selects,
|
18
|
+
# joins and where keys MUST be symbols, the nested hashes (when used)
|
19
|
+
# MAY have either String or Symbol keys.
|
20
|
+
#
|
21
|
+
# :selects key ==
|
22
|
+
# The selects key is most useful as a hash, the keys will get used as aliases
|
23
|
+
# for the specified template strings or symbol column defniitions.
|
24
|
+
#
|
25
|
+
# The following examples cover most use cases:
|
26
|
+
# ```
|
27
|
+
# # basic column specifications
|
28
|
+
# { first_name: :students__first_name } # => `students`.`first_name` AS 'first_name'
|
29
|
+
# { 'first_name' => '<students.first_name>' } # => `students`.`first_name` AS 'first_name'
|
30
|
+
# { 'first_name' => :first_name } # => `first_name` AS 'first_name'
|
31
|
+
#
|
32
|
+
# # generate arbitray output strings:
|
33
|
+
# { 'user_id' => 'student_<students.id>' } # => ('student_' || `students`.`first_name`) AS 'first_name'
|
34
|
+
#
|
35
|
+
# # use periods in table/column names by quoting
|
36
|
+
# { 'first_name' => '<students."name.first_name">' } # => `students`.`name.first_name` AS 'first_name'
|
37
|
+
#
|
38
|
+
# # use backticks as quotes
|
39
|
+
# { 'first_name' => '<students.`name.first_name`>' } # => `students`.`name.first_name` AS 'first_name'
|
40
|
+
#
|
41
|
+
# # coalesce values using a pipe `|` and supply a default value using single quotes
|
42
|
+
# { 'course_name' => "<staff.admin_username | staff.teacher_username | 'default_value'>" }
|
43
|
+
# # => COALESCE(`staff`.`admin_username`, `staff`.`teacher_username`, 'default_value') AS 'course_name'
|
44
|
+
#
|
45
|
+
# # use SQL functions (N.B. this currently does no manipulation on arguments so they remain strings)
|
46
|
+
# { 'course_length' => "<length('foobar')>" } # => LENGTH('foobar') AS 'course_length'
|
47
|
+
#
|
48
|
+
# # delimited arrays are specially handled using concat_ws (which rejects NULLS before concatenating the results):
|
49
|
+
# { 'course_name' => '<courses.name>-<terms.abbreviation>-<staff."name.last_name">' }
|
50
|
+
# # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '-') AS 'course_name'
|
51
|
+
#
|
52
|
+
# # in delimited arrays the delimiter can be any arbitrary sequence:
|
53
|
+
# { 'course_name' => '<courses.name>=+=<terms.abbreviation>=+=<staff."name.last_name">' }
|
54
|
+
# # => concat_ws(`courses`.`name`, `terms`.`abbreviation`, `staff`.`name.last_name`, '=+=') AS 'course_name'
|
55
|
+
# ```
|
56
|
+
#
|
57
|
+
# :joins key ==
|
58
|
+
# The joins key is somewhat less flexible in what it will accept and
|
59
|
+
# generate but it still meets current needs.
|
60
|
+
#
|
61
|
+
# ```
|
62
|
+
# # in hash specifications symbols and strings are interchangeable
|
63
|
+
# # assuming we're selecting from a table named `foos`
|
64
|
+
# { bars: [:id, :foo_id] } # => LEFT JOIN `bars` ON `foos`.`id` = `bars`.`foo_id`
|
65
|
+
# [:bars, :quxes] # => LEFT JOIN `bars` LEFT JOIN `quxes`
|
66
|
+
# ```
|
67
|
+
#
|
68
|
+
# :where key ==
|
69
|
+
# This is even more limited than the joins key and I don't anticipate
|
70
|
+
# it being extended because for these transforms there isn't much
|
71
|
+
# filtering to be done.
|
72
|
+
# This key will only accept a string that gets inserted directly into
|
73
|
+
# the SQL, after extensive testing this doesn't appear to present any
|
74
|
+
# SQLi potential since only the first statement gets executed and no
|
75
|
+
# additional information can be pulled into the query at this point
|
76
|
+
# due to theis being the last segment we support. A user can still,
|
77
|
+
# however, cause the query to be invalid or reduce the amount of data
|
78
|
+
# returned.
|
79
|
+
|
80
|
+
def initialize(data_set, templates)
|
81
|
+
@data_set = data_set
|
82
|
+
@set_name = data_set.opts[:from].first
|
83
|
+
@templates = templates
|
84
|
+
end
|
85
|
+
|
86
|
+
# Applys the selects and joins to the passed dataset, returns a
|
87
|
+
# new Dataset which can be interacted with using Enumerable methods
|
88
|
+
# as well as any other methods on Sequel::Dataset
|
89
|
+
def apply
|
90
|
+
ast = apply_selects(data_set, templates[:selects], templates[:functions])
|
91
|
+
ast = apply_joins(ast, templates[:joins])
|
92
|
+
apply_where(ast, templates[:where])
|
93
|
+
end
|
94
|
+
|
95
|
+
# #sql gives access to the SQL generated by applying the filters to the
|
96
|
+
# dataset, this is mostly for diagnositcs and testing since other code
|
97
|
+
# can interact with the dataset returned from #apply using Enumerable
|
98
|
+
# methods
|
99
|
+
def sql
|
100
|
+
apply.sql
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
# TODO: figure out how to allow specification of what type of join we're doing
|
106
|
+
# as well as being able to join tables to ones we've already joined, maybe
|
107
|
+
# we can do something like:
|
108
|
+
# {table_three: ['<table_two.id>', '<table_three.two_id>', 'inner']}
|
109
|
+
# with the last part being optional with the default being left
|
110
|
+
def apply_joins(ast, joins)
|
111
|
+
return ast unless joins
|
112
|
+
case joins
|
113
|
+
when String
|
114
|
+
ast.join(joins)
|
115
|
+
when Array
|
116
|
+
joins.inject(ast) {|ast, join|
|
117
|
+
ast.left_join(join)
|
118
|
+
}
|
119
|
+
when Hash
|
120
|
+
joins.inject(ast) {|ast, (joined_table, columns)|
|
121
|
+
ast.left_join(joined_table, Sequel.qualify(@set_name, columns[0]) => Sequel.qualify(joined_table, columns[1]))
|
122
|
+
}
|
123
|
+
else
|
124
|
+
raise "Unsupported type (#{ joins.class }) in joins key for filter template"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def apply_selects(ast, selects, functions)
|
129
|
+
return ast unless selects
|
130
|
+
case selects
|
131
|
+
when String
|
132
|
+
ast.select(selects)
|
133
|
+
when Hash
|
134
|
+
compiled_selects = selects.map do |(select_alias, template_string)|
|
135
|
+
SelectTemplate.compile(select_alias, template_string, functions || {})
|
136
|
+
end
|
137
|
+
ast.select(*compiled_selects)
|
138
|
+
else
|
139
|
+
raise "Unsupported type (#{ selects.class }) in selects key for filter template"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def apply_where(ast, where)
|
144
|
+
return ast unless where
|
145
|
+
case where
|
146
|
+
when String
|
147
|
+
ast.where(where)
|
148
|
+
else
|
149
|
+
raise "Unsupported type (#{ where.class }) in where key for filter template"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'active_support/core_ext/hash'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
class FunctionCall
|
7
|
+
EXTRACTION_REGEXP = /\A(?<function>[^\(]+)\((?<args>[^\)]*)\)\z/
|
8
|
+
|
9
|
+
DEFAULT_CUSTOM_FUNCTIONS = {
|
10
|
+
'random_str' => ->(args) { "LOWER(HEX(RANDOMBLOB(#{ args[0].to_i })))" },
|
11
|
+
}.with_indifferent_access
|
12
|
+
|
13
|
+
attr_reader :custom_functions
|
14
|
+
|
15
|
+
def self.match?(string)
|
16
|
+
!!EXTRACTION_REGEXP.match(string.strip)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.extract_from(string, custom_functions={})
|
20
|
+
new(string, custom_functions).extract
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize(string, custom_functions={})
|
24
|
+
@string = string.strip
|
25
|
+
@custom_functions = DEFAULT_CUSTOM_FUNCTIONS.merge(custom_functions)
|
26
|
+
end
|
27
|
+
|
28
|
+
def extract
|
29
|
+
if custom_function?
|
30
|
+
Sequel.lit(compile_custom_function)
|
31
|
+
else
|
32
|
+
Sequel.function(function.upcase, *arguments)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def function
|
37
|
+
call_parts[:function]
|
38
|
+
end
|
39
|
+
|
40
|
+
# TODO: Make this type cast integers, booleans and possibly identifiers
|
41
|
+
def arguments
|
42
|
+
@arguments ||= call_parts[:args].split(',').map(&:strip)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def compile_custom_function
|
48
|
+
@custom_functions[function.downcase].call(arguments)
|
49
|
+
end
|
50
|
+
|
51
|
+
def custom_function?
|
52
|
+
@custom_functions.keys.include? function.downcase
|
53
|
+
end
|
54
|
+
|
55
|
+
def call_parts
|
56
|
+
@function_parts ||= EXTRACTION_REGEXP.match(@string)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
module Bespoke
|
4
|
+
class Export
|
5
|
+
class Filter
|
6
|
+
class Identifier
|
7
|
+
def self.extract_from(value)
|
8
|
+
new(value).sequel_identifier
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(unparsed_value)
|
12
|
+
@unparsed_value = unparsed_value.dup.strip.freeze
|
13
|
+
end
|
14
|
+
|
15
|
+
def sequel_identifier
|
16
|
+
table.nil? ? Sequel.identifier(column) : Sequel.qualify(table, column)
|
17
|
+
end
|
18
|
+
|
19
|
+
def column
|
20
|
+
parts[:column]
|
21
|
+
end
|
22
|
+
|
23
|
+
def table
|
24
|
+
parts[:table]
|
25
|
+
end
|
26
|
+
|
27
|
+
QUOTED_PART_EXTRACTION_REGEXP = /(?<quote>["`])(?<value>\S+?)\k<quote>/
|
28
|
+
def parts
|
29
|
+
return @parts if @parts
|
30
|
+
if quoted_identifier?
|
31
|
+
parts_array = extract_parts_from_quoted_identifier
|
32
|
+
else
|
33
|
+
parts_array = @unparsed_value.split('.')
|
34
|
+
end
|
35
|
+
@parts = {
|
36
|
+
table: (parts_array.length == 1 ? nil : parts_array.first),
|
37
|
+
column: parts_array.last,
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def quoted_identifier?
|
44
|
+
!!QUOTED_PART_EXTRACTION_REGEXP.match(@unparsed_value)
|
45
|
+
end
|
46
|
+
|
47
|
+
def extract_parts_from_quoted_identifier
|
48
|
+
scanner = StringScanner.new(@unparsed_value)
|
49
|
+
template = @unparsed_value.dup
|
50
|
+
identifiers = []
|
51
|
+
|
52
|
+
while scanner.scan_until(QUOTED_PART_EXTRACTION_REGEXP)
|
53
|
+
quoted_part = scanner[0]
|
54
|
+
bare_identifier = scanner[2]
|
55
|
+
identifiers << bare_identifier
|
56
|
+
template.sub!(quoted_part, identifiers.index(bare_identifier).to_s)
|
57
|
+
end
|
58
|
+
|
59
|
+
parts_array = template.split('.').map{|part|
|
60
|
+
/\A\d+\z/.match(part) ? identifiers[part.to_i] : part
|
61
|
+
}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
%w{function_call identifier string_literal}.each do |file|
|
2
|
+
require_relative file
|
3
|
+
end
|
4
|
+
|
5
|
+
module Bespoke
|
6
|
+
class Export
|
7
|
+
class Filter
|
8
|
+
class SelectTemplate
|
9
|
+
attr_accessor :template_string, :select_alias
|
10
|
+
|
11
|
+
TEMPLATE_CHECK_REGEXP = /<[^>]+>/
|
12
|
+
|
13
|
+
def self.compile(select_alias, template_string, custom_functions={})
|
14
|
+
new(select_alias, template_string, custom_functions).compile
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(select_alias, template_string, custom_functions={})
|
18
|
+
@select_alias = select_alias
|
19
|
+
@template_string = template_string
|
20
|
+
@custom_functions = custom_functions
|
21
|
+
end
|
22
|
+
|
23
|
+
def compile
|
24
|
+
return Sequel.as(template_string, select_alias) if simple_string_template?
|
25
|
+
return column_definitions.first.as(select_alias) if single_column_template?
|
26
|
+
return Sequel.function(:concat_ws, *column_definitions, array_separator).as(select_alias) if joined_array_template?
|
27
|
+
|
28
|
+
concatenated_literals_and_columns
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def array_separator
|
34
|
+
unique_string_literal_parts.first
|
35
|
+
end
|
36
|
+
|
37
|
+
def column_definitions
|
38
|
+
@column_definitions ||= extract_columns_from_template
|
39
|
+
end
|
40
|
+
|
41
|
+
def complex_template?
|
42
|
+
!!TEMPLATE_CHECK_REGEXP.match(template_string)
|
43
|
+
end
|
44
|
+
|
45
|
+
def concatenated_literals_and_columns
|
46
|
+
joined_parts = split_template_parts.zip(column_definitions).flatten.reject{|part| part == '' || part.nil?}
|
47
|
+
|
48
|
+
first_part = (joined_parts.first.is_a?(String) ? Sequel.expr(joined_parts.shift) : joined_parts.shift)
|
49
|
+
|
50
|
+
joined_parts.inject(first_part) {|ast, part|
|
51
|
+
ast + part
|
52
|
+
}.as(select_alias)
|
53
|
+
end
|
54
|
+
|
55
|
+
def extract_columns_from_template
|
56
|
+
template_string.scan(TEMPLATE_CHECK_REGEXP).map {|template_part|
|
57
|
+
unbracketed_template = template_part[1..-2].strip
|
58
|
+
identifiers = extract_identifiers(unbracketed_template)
|
59
|
+
(identifiers.length == 1 ? identifiers[0] : Sequel.function(:COALESCE, *identifiers))
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
def extract_identifiers(template_part)
|
64
|
+
template_part.split(/\s*\|\s*/).map {|string|
|
65
|
+
if StringLiteral.match?(string)
|
66
|
+
StringLiteral.extract_from(string)
|
67
|
+
elsif FunctionCall.match?(string)
|
68
|
+
FunctionCall.extract_from(string, @custom_functions)
|
69
|
+
else
|
70
|
+
Identifier.extract_from(string)
|
71
|
+
end
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def joined_array_template?
|
76
|
+
single_unique_string_literal_part? && multiple_column_definitions?
|
77
|
+
end
|
78
|
+
|
79
|
+
def multiple_column_definitions?
|
80
|
+
column_definitions.size > 1
|
81
|
+
end
|
82
|
+
|
83
|
+
def simple_string_template?
|
84
|
+
!complex_template?
|
85
|
+
end
|
86
|
+
|
87
|
+
def single_column_template?
|
88
|
+
column_definitions.size == 1 && template_string_literals.empty?
|
89
|
+
end
|
90
|
+
|
91
|
+
def single_unique_string_literal_part?
|
92
|
+
unique_string_literal_parts.size == 1
|
93
|
+
end
|
94
|
+
|
95
|
+
def split_template_parts
|
96
|
+
@split_template_parts ||= template_string.split(TEMPLATE_CHECK_REGEXP)
|
97
|
+
end
|
98
|
+
|
99
|
+
def template_string_literals
|
100
|
+
@template_string_literals ||= split_template_parts.reject{|part| part == ""}
|
101
|
+
end
|
102
|
+
|
103
|
+
def unique_string_literal_parts
|
104
|
+
@unique_string_literal_parts ||= template_string_literals.uniq
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|