activefacts-compositions 1.9.18 → 1.9.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9264168bcd4294c87277a11fde73ba06c0e490e6
4
- data.tar.gz: 40f2da7b5f7a72066b13ed7bc2de790e3717af82
3
+ metadata.gz: 2dd043b17bcbdb5bf4f525fe24022ad95a32946e
4
+ data.tar.gz: 95fb5b1b17e005cc0bd90f6f9c527e6455557a87
5
5
  SHA512:
6
- metadata.gz: ecb7fb4562dbe1e8bef8924abc26a19c2aa4816eca5d09fd609b34a060acd11bef1d72642fe32ef631a5af2c1bd82bc968fbe8c48d386c0f409d41aba624c5bc
7
- data.tar.gz: 46e7e8d45dadc6154867d2ce6ccd605bbcd897ab86f063481b8680b0fec4284ccd529edbd38fa86cabadeaad1584e82090fd89fc3f3a430da6dce47d61950106
6
+ metadata.gz: 528cbe042208d8dd290ef5cd5e11ae390261f25c04c4e5558ef111f7fc18aab8a82ea1421ee2147c71d3659c077c93af632ba7372fc337b2797f2a6c69846b30
7
+ data.tar.gz: fdf3d9610a0751f91d8b2b8fb022918aa81c31b99a73a359100622cef6f87d04338428253172921bde29e48906b25622915ea89d20fa416e0d156e8fd1695fff
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.require_paths = ["lib"]
21
21
 
22
22
  spec.add_development_dependency "bundler", ">= 1.11"
23
- spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rake", "> 10"
24
24
  spec.add_development_dependency "rspec", "~> 3.3"
25
25
 
26
26
  spec.add_runtime_dependency "activesupport", ">= 4.2.7"
@@ -630,13 +630,8 @@ module ActiveFacts
630
630
  if absorption.foreign_key
631
631
  trace :datavault, "Setting new source composite for #{absorption.foreign_key.inspect}"
632
632
  absorption.foreign_key.source_composite = link
633
- if absorption.foreign_key.all_foreign_key_field.single
634
- fk2_component = absorption.foreign_key.all_foreign_key_field.single.component
635
- else
636
- p absorption.foreign_key
637
- debugger
638
- absorption.foreign_key.retract
639
- end
633
+ debugger unless absorption.foreign_key.all_foreign_key_field.single
634
+ fk2_component = absorption.foreign_key.all_foreign_key_field.single.component
640
635
  end
641
636
  end
642
637
 
@@ -51,6 +51,16 @@ module ActiveFacts
51
51
  @option_surrogates && composite.mapping.object_type != @loadbatch_entity_type
52
52
  end
53
53
 
54
+ def inject_surrogates
55
+ assign_groups
56
+ super
57
+ end
58
+
59
+ def assign_groups
60
+ @composites.values.each{|composite| composite.composite_group = 'base' }
61
+ loadbatch_composite.composite_group = 'batch' if @option_audit == 'batch'
62
+ end
63
+
54
64
  def generate
55
65
  create_loadbatch if @option_audit == 'batch'
56
66
  super
@@ -1,5 +1,5 @@
1
1
  module ActiveFacts
2
2
  module Compositions
3
- VERSION = "1.9.18"
3
+ VERSION = "1.9.19"
4
4
  end
5
5
  end
@@ -8,7 +8,6 @@
8
8
  require 'activefacts/metamodel'
9
9
  require 'activefacts/compositions'
10
10
  require 'activefacts/generator'
11
- require 'byebug'
12
11
 
13
12
  module ActiveFacts
14
13
  module Generators #:nodoc:
@@ -50,7 +50,7 @@ module ActiveFacts
50
50
 
51
51
  def process_options options
52
52
  @value_width = (options.delete('value_width') || 32).to_i
53
- @phonetic_confidence = (options.delete('phonetic_confidence') || 70).to_i
53
+ @phonetic_confidence = (options.delete('phonetic_confidence') || 40).to_i
54
54
 
55
55
  super
56
56
  end
@@ -236,32 +236,55 @@ module ActiveFacts
236
236
  case sm
237
237
  when 'none' # Do not index this value
238
238
  nil
239
+
239
240
  when 'simple' # Disregard white-space only
240
241
  select(composite, truncate(col_expr, @value_width), 'simple', source_field, 1.0)
241
242
 
242
- when 'alpha' # Strip white space and punctuation, just use alphabetic characters
243
- select(composite, truncate(as_alpha(col_expr), @value_width), 'alpha', source_field, 0.9)
244
-
245
- when 'words' # Break the text into words and match each word like alpha
246
- nil # REVISIT: Implement this type
247
-
248
- # when 'phrases' # Words, but where adjacent sequences of words matter
249
- when 'typo' # Use trigram similarity to detect typographic errors
250
- # REVISIT: Implement this type properly
251
- select(composite, trigram(as_alpha(col_expr)), 'typo', source_field, 0.9)
243
+ when 'alpha', # Strip white space and punctuation, just use alphabetic characters
244
+ 'typo' # Use trigram similarity to detect typographic errors, over the same values
245
+ truncated = truncate(as_alpha(col_expr), @value_width)
246
+ select(
247
+ composite, truncated, sm, source_field,
248
+ "CASE WHEN #{truncated} = #{col_expr} THEN 1.0 ELSE 0.95 END" # Maybe exact match.
249
+ )
252
250
 
253
251
  when 'phonetic' # Use phonetic matching as well as trigrams
254
- phonetics(col_expr).map do |p|
255
- select(composite, p, 'phonetic', source_field, @phonetic_confidence/100.0)
256
- end
252
+ search_expr(composite, intrinsic_type, col_expr, ['typo'], source_field) <<
253
+ select(composite, phonetics(col_expr), 'phonetic', source_field, @phonetic_confidence/100.0, true)
254
+
255
+ when 'words' # Break the text into words and match each word like alpha
256
+ truncated = truncate(unnest(as_words(col_expr)), @value_width)
257
+ select(composite, truncated, sm, source_field, 0.90, true)
257
258
 
258
259
  when 'names' # Break the text into words and match each word like phonetic
259
- nil # REVISIT: Implement this type
260
+ truncated = truncate(unnest(as_words(col_expr, "''-")), @value_width) # N.B. ' is doubled for SQL
261
+ search_expr(composite, intrinsic_type, col_expr, ['words'], source_field) <<
262
+ phonetics(truncated).map do |phonetic|
263
+ select(composite, phonetic, 'names', source_field, @phonetic_confidence/100.0, true)
264
+ end
260
265
 
261
266
  when 'text' # Index a large text field using significant words and phrases
262
267
  nil # REVISIT: Implement this type
268
+
269
+ when 'number' # Cast to number and back to text to canonicalise the value;
270
+ # If the number doesn't match this regexp, we don't index it.
271
+ # This doesn't handle all valid Postgres numeric literals (e.g. 2.3e-4)
272
+ select(composite, col_expr, 'number', source_field, number_or_null(col_expr))
273
+
274
+ when 'phone' # Phone numbers; split, strip each to digits, take the last 8 of each
275
+ select(composite, phone_numbers(col_expr), 'phone', source_field, 1)
276
+
277
+ when 'email' # Use a regexp to find email addresses in this field
278
+ select(composite, truncate(email_addresses(col_expr), @value_width), 'email', source_field, 1)
279
+
280
+ when 'date' # Convert string to standard date format if it looks like a date, NULL otherwise
281
+ select(
282
+ composite, col_expr, 'date', source_field, 1,
283
+ %Q{CASE WHEN #{col_expr} ~ '^ *[0-9]+[.]?[0-9]*|[.][0-9]+) *$' THEN (#{col_expr}::numeric):text ELSE NULL END}
284
+ )
285
+
263
286
  else
264
- raise "Unknown search method #{sm}"
287
+ $stderrs.puts "Unknown search method #{sm}"
265
288
  end
266
289
  end
267
290
 
@@ -273,6 +296,7 @@ module ActiveFacts
273
296
  MM::DataType::TYPE_Decimal,
274
297
  MM::DataType::TYPE_Money
275
298
  # Produce a right-justified value
299
+ # REVISIT: This is a dumb thing to do.
276
300
  select(composite, lexical_decimal(col_expr, @value_width, value_type.scale), 'simple', source_field, 1)
277
301
 
278
302
  when MM::DataType::TYPE_Date
@@ -301,7 +325,7 @@ module ActiveFacts
301
325
  name.words.send(@column_case)*@column_joiner
302
326
  end
303
327
 
304
- def select composite, expression, processing, source_field, confidence = 1, where = []
328
+ def select composite, expression, processing, source_field, confidence = 1, distinct = false, where = []
305
329
  # These fields are in order of index precedence, to co-locate
306
330
  # comparable values regardless of source record type or column
307
331
  where << 'Value IS NOT NULL' if expression.to_s =~ /\bNULL\b/
@@ -313,17 +337,16 @@ module ActiveFacts
313
337
  source_table_name = stylise_column_name("SourceTable")
314
338
  source_field_name = stylise_column_name("SourceField")
315
339
  expression_text = expression.to_s
316
- expression_text = "ARRAY[#{expression_text}]" unless expression.is_array
317
340
  select = %Q{
318
- SELECT '#{processing}' AS #{processing_name},
341
+ SELECT#{distinct ? ' DISTINCT' : ''}
342
+ '#{processing}' AS #{processing_name},
319
343
  #{expression_text} AS #{value_name},
320
344
  #{load_batch_id_name},
321
- #{"%.2f" % confidence} AS #{confidence_name},
345
+ #{confidence} AS #{confidence_name},
322
346
  #{record_guid_name},
323
347
  '#{safe_table_name(composite)}' AS #{source_table_name},
324
348
  '#{source_field}' AS #{source_field_name}
325
- FROM #{safe_table_name(composite)}
326
- WHERE COALESCE(#{expression},'') <> ''}.
349
+ FROM #{safe_table_name(composite)}}.
327
350
  unindent
328
351
 
329
352
  if where.empty?
@@ -16,6 +16,7 @@ module ActiveFacts
16
16
  HEADER = "# Auto-generated from CQL, edits will be lost"
17
17
  def self.options
18
18
  ({
19
+ keep: ['Boolean', "Keep stale model files"],
19
20
  output: [String, "Overwrite model files into this output directory"],
20
21
  concern: [String, "Namespace for the concerns"],
21
22
  validation: ['Boolean', "Disable generation of validations"],
@@ -25,6 +26,7 @@ module ActiveFacts
25
26
  def initialize composition, options = {}
26
27
  @composition = composition
27
28
  @options = options
29
+ @option_keep = options.delete("keep")
28
30
  @option_output = options.delete("output")
29
31
  @option_concern = options.delete("concern")
30
32
  @option_validations = options.include?('validations') ? options.delete("validations") : true
@@ -35,7 +37,7 @@ module ActiveFacts
35
37
  end
36
38
 
37
39
  def generate
38
- list_extant_files if @option_output
40
+ list_extant_files if @option_output && !@option_keep
39
41
 
40
42
  @ok = true
41
43
  models =
@@ -46,7 +48,7 @@ module ActiveFacts
46
48
  compact*"\n"
47
49
 
48
50
  warn "\# #{@composition.name} generated with errors" unless @ok
49
- delete_old_generated_files if @option_output
51
+ delete_old_generated_files if @option_output && !@option_keep
50
52
 
51
53
  models
52
54
  end
@@ -158,6 +160,7 @@ module ActiveFacts
158
160
  composite.all_foreign_key_as_source_composite.
159
161
  sort_by{ |fk| fk.all_foreign_key_field.map(&:component).flat_map(&:path).map(&:rank_key) }.
160
162
  flat_map do |fk|
163
+ next nil if fk.all_foreign_key_field.size > 1
161
164
  association_name = fk.rails.from_association_name
162
165
 
163
166
  if association_name != fk.composite.rails.singular_name
@@ -171,9 +174,14 @@ module ActiveFacts
171
174
  foreign_key = ''
172
175
  end
173
176
 
177
+ single_fk_field = fk.all_foreign_key_field.single.component
178
+ if !single_fk_field.path_mandatory
179
+ optional = ", :optional => true"
180
+ end
181
+
174
182
  [
175
183
  fk.mapping ? " \# #{fk.mapping.comment}" : nil,
176
- " belongs_to :#{association_name}#{class_name}#{foreign_key}",
184
+ " belongs_to :#{association_name}#{class_name}#{foreign_key}#{optional}",
177
185
  fk.mapping ? '' : nil,
178
186
  ]
179
187
  end.compact
@@ -184,9 +192,10 @@ module ActiveFacts
184
192
  composite.all_foreign_key_as_target_composite.
185
193
  sort_by{ |fk| fk.all_foreign_key_field.map(&:component).flat_map(&:path).map(&:rank_key) }.
186
194
  flat_map do |fk|
195
+ next nil if fk.all_foreign_key_field.size > 1
187
196
 
188
197
  if fk.all_foreign_key_field.size > 1
189
- raise "Can't emit Rails associations for multi-part foreign key with #{fk.references.inspect}. Did you mean to use --surrogate?"
198
+ raise "Can't emit Rails associations for multi-part foreign key with #{fk.all_foreign_key_field.inspect}. Did you mean to use --surrogate?"
190
199
  end
191
200
 
192
201
  association_type, association_name = *fk.rails.to_association
@@ -205,15 +214,22 @@ module ActiveFacts
205
214
  fk.source_composite.primary_index.all_index_field.map(&:component).flat_map do |ic|
206
215
  next nil if ic.is_a?(MM::Indicator) # or use rails.plural_name(ic.references[0].to_names) ?
207
216
  onward_fks = ic.all_foreign_key_field.map(&:foreign_key)
208
- next nil if onward_fks.size == 0 or onward_fks.detect{|fk| fk.composite == composite} # Skip the back-reference
209
- # REVISIT: This far association name needs to be augmented for its role name
210
- " has_many :#{onward_fks[0].composite.rails.plural_name}, :through => :#{association_name}"
217
+ next nil if onward_fks.size == 0 or onward_fks.detect{|ofk| ofk.composite == composite} # Skip the back-reference
218
+ # This far association name needs to be augmented for its role name
219
+ # so the reverse associations still work for customised association names
220
+ source =
221
+ if composite.rails.singular_name != fk.rails.from_association_name
222
+ ", :source => :#{fk.rails.from_association_name}"
223
+ else
224
+ ''
225
+ end
226
+ " has_many :#{onward_fks[0].composite.rails.plural_name}, :through => :#{association_name}#{source}"
211
227
  end.compact
212
228
  else
213
229
  []
214
230
  end +
215
231
  [fk.mapping ? '' : nil]
216
- end
232
+ end.compact
217
233
  end
218
234
 
219
235
  def column_constraints composite
@@ -223,7 +239,9 @@ module ActiveFacts
223
239
  next unless component.path_mandatory && !component.is_a?(Metamodel::Indicator)
224
240
  next if composite.primary_index != composite.natural_index && composite.primary_index.all_index_field.detect{|ixf| ixf.component == component}
225
241
  next if component.is_a?(Metamodel::Mapping) && component.object_type.is_a?(Metamodel::ValueType) && component.is_auto_assigned
226
- [ " validates :#{component.column_name.snakecase}, :presence => true" ]
242
+ if component.all_foreign_key_field.size == 0
243
+ [ " validates :#{component.column_name.snakecase}, :presence => true" ]
244
+ end
227
245
  end.compact
228
246
  ccs.unshift("") unless ccs.empty?
229
247
  ccs
@@ -19,6 +19,7 @@ module ActiveFacts
19
19
  attr_reader :type_num # ActiveFacts::Metamodel::DataType number
20
20
  attr_reader :value # String representation of the expression
21
21
  attr_reader :is_mandatory # false if nullable
22
+ # This doesn't handle Postgres expressions, which can include a sub-table (e.g. via unnest)
22
23
  attr_reader :is_array # the expression returns an array of the specified type
23
24
 
24
25
  # Construct an expression that addresses a field from a Metamodel::Component
@@ -146,22 +146,24 @@ module ActiveFacts
146
146
  "'|'::text || " +
147
147
  expressions.map(&:to_s) * " || '|'::text || " +
148
148
  " || '|'::text",
149
- MM::DataType::TYPE_String,
150
- true
149
+ MM::DataType::TYPE_String
151
150
  )
152
151
  end
153
152
 
154
153
  # Return an expression that yields a hash of the given expression
155
154
  def hash expr, algo = 'sha1'
156
- Expression.new("digest(#{expr}, '#{algo}')", MM::DataType::TYPE_Binary, expr.is_mandatory)
155
+ Expression.new("digest(#{expr}, '#{algo}')", MM::DataType::TYPE_Binary, expr.is_mandatory, expr.is_array)
157
156
  end
158
157
 
159
158
  def truncate expr, length
160
- Expression.new("substring(#{expr} for #{length})", MM::DataType::TYPE_String, expr.is_mandatory)
159
+ Expression.new("left(#{expr}, #{length})", MM::DataType::TYPE_String, expr.is_mandatory, expr.is_array)
161
160
  end
162
161
 
163
162
  def trigram expr
164
- Expression.new("show_trgm(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
163
+ # This is not a useful way to handle trigrams. Instead, create a trigram index
164
+ # over an ordinary text index value, and use a similarity search over that.
165
+ # Expression.new("show_trgm(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
166
+ expr
165
167
  end
166
168
 
167
169
  # Produce a lexically-sortable decimal representation of the given numeric expression, to the overall specified length and scale
@@ -174,6 +176,39 @@ module ActiveFacts
174
176
  )
175
177
  end
176
178
 
179
+ def number_or_null expr
180
+ Expression.new(
181
+ %Q{CASE WHEN #{expr} ~ '^ *[-+]?([0-9]+[.]?[0-9]*|[.][0-9]+) *$' THEN #{expr}::numeric ELSE NULL END},
182
+ MM::DataType::TYPE_Real,
183
+ false
184
+ )
185
+ end
186
+
187
+ def split_on_separators expr, seps = ',\\\\|'
188
+ Expression.new(
189
+ %Q{regexp_split_to_table(#{expr}, E'#{seps}')},
190
+ MM::DataType::TYPE_String, true, true
191
+ )
192
+ end
193
+
194
+ # Extract separated numbers, remove non-digits, take the last 8 (removing area codes etc)
195
+ def phone_numbers expr
196
+ Expression.new(
197
+ %Q{right(#{split_on_separators(%Q{regexp_replace(#{expr}, '[^0-9]+', '', 'g')})}, 8)},
198
+ MM::DataType::TYPE_String,
199
+ true
200
+ )
201
+ end
202
+
203
+ # Extract separated numbers, remove non-digits, take the last 8 (removing area codes etc)
204
+ def email_addresses expr
205
+ Expression.new(
206
+ %Q{unnest(regexp_matches(#{expr}, E'[-_.[:alnum:]]+@[-_.[:alnum:]]+'))},
207
+ MM::DataType::TYPE_String,
208
+ true
209
+ )
210
+ end
211
+
177
212
  def lexical_date expr
178
213
  Expression.new("to_char(#{expr}, 'YYYY-MM-DD')", MM::DataType::TYPE_String, expr.is_mandatory)
179
214
  end
@@ -190,13 +225,38 @@ module ActiveFacts
190
225
  Expression.new("btrim(lower(regexp_replace(#{expr}, '[^[:alnum:]]+', ' ', 'g')))", MM::DataType::TYPE_String, expr.is_mandatory)
191
226
  end
192
227
 
228
+ def as_words expr, extra_word_chars = ''
229
+ Expression.new(
230
+ "regexp_split_to_array(lower(#{expr}), E'[^[:alnum:]#{extra_word_chars}]+')",
231
+ MM::DataType::TYPE_String, expr.is_mandatory, true
232
+ )
233
+ end
234
+
235
+ def unnest expr
236
+ Expression.new("unnest(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
237
+ end
238
+
193
239
  def phonetics expr
194
- dmetaphone = "dmetaphone(#{expr})"
195
- dmetaphone_alt = "dmetaphone_alt(#{expr})"
196
- [
197
- Expression.new(dmetaphone, MM::DataType::TYPE_String, expr.is_mandatory),
198
- Expression.new("CASE WHEN #{dmetaphone} <> #{dmetaphone_alt} THEN #{dmetaphone_alt} ELSE NULL END", MM::DataType::TYPE_String, expr.is_mandatory)
199
- ]
240
+ if expr.is_array
241
+ [
242
+ Expression.new(
243
+ %Q{dmetaphone(#{expr})},
244
+ MM::DataType::TYPE_String,
245
+ expr.is_mandatory
246
+ ),
247
+ Expression.new(
248
+ %Q{dmetaphone_alt(#{expr})},
249
+ MM::DataType::TYPE_String,
250
+ expr.is_mandatory
251
+ )
252
+ ]
253
+ else
254
+ Expression.new(
255
+ %Q{unnest(ARRAY[dmetaphone(#{expr}), dmetaphone_alt(#{expr})])},
256
+ MM::DataType::TYPE_String,
257
+ expr.is_mandatory
258
+ )
259
+ end
200
260
  end
201
261
 
202
262
  # Reserved words cannot be used anywhere without quoting.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: activefacts-compositions
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.18
4
+ version: 1.9.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Clifford Heath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-15 00:00:00.000000000 Z
11
+ date: 2018-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -28,16 +28,16 @@ dependencies:
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '10'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '10'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -257,7 +257,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
257
257
  version: '0'
258
258
  requirements: []
259
259
  rubyforge_project:
260
- rubygems_version: 2.4.5
260
+ rubygems_version: 2.6.13
261
261
  signing_key:
262
262
  specification_version: 4
263
263
  summary: Create and represent composite schemas, schema transforms and data transforms