activefacts-compositions 1.9.18 → 1.9.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9264168bcd4294c87277a11fde73ba06c0e490e6
4
- data.tar.gz: 40f2da7b5f7a72066b13ed7bc2de790e3717af82
3
+ metadata.gz: 2dd043b17bcbdb5bf4f525fe24022ad95a32946e
4
+ data.tar.gz: 95fb5b1b17e005cc0bd90f6f9c527e6455557a87
5
5
  SHA512:
6
- metadata.gz: ecb7fb4562dbe1e8bef8924abc26a19c2aa4816eca5d09fd609b34a060acd11bef1d72642fe32ef631a5af2c1bd82bc968fbe8c48d386c0f409d41aba624c5bc
7
- data.tar.gz: 46e7e8d45dadc6154867d2ce6ccd605bbcd897ab86f063481b8680b0fec4284ccd529edbd38fa86cabadeaad1584e82090fd89fc3f3a430da6dce47d61950106
6
+ metadata.gz: 528cbe042208d8dd290ef5cd5e11ae390261f25c04c4e5558ef111f7fc18aab8a82ea1421ee2147c71d3659c077c93af632ba7372fc337b2797f2a6c69846b30
7
+ data.tar.gz: fdf3d9610a0751f91d8b2b8fb022918aa81c31b99a73a359100622cef6f87d04338428253172921bde29e48906b25622915ea89d20fa416e0d156e8fd1695fff
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.require_paths = ["lib"]
21
21
 
22
22
  spec.add_development_dependency "bundler", ">= 1.11"
23
- spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "rake", "> 10"
24
24
  spec.add_development_dependency "rspec", "~> 3.3"
25
25
 
26
26
  spec.add_runtime_dependency "activesupport", ">= 4.2.7"
@@ -630,13 +630,8 @@ module ActiveFacts
630
630
  if absorption.foreign_key
631
631
  trace :datavault, "Setting new source composite for #{absorption.foreign_key.inspect}"
632
632
  absorption.foreign_key.source_composite = link
633
- if absorption.foreign_key.all_foreign_key_field.single
634
- fk2_component = absorption.foreign_key.all_foreign_key_field.single.component
635
- else
636
- p absorption.foreign_key
637
- debugger
638
- absorption.foreign_key.retract
639
- end
633
+ debugger unless absorption.foreign_key.all_foreign_key_field.single
634
+ fk2_component = absorption.foreign_key.all_foreign_key_field.single.component
640
635
  end
641
636
  end
642
637
 
@@ -51,6 +51,16 @@ module ActiveFacts
51
51
  @option_surrogates && composite.mapping.object_type != @loadbatch_entity_type
52
52
  end
53
53
 
54
+ def inject_surrogates
55
+ assign_groups
56
+ super
57
+ end
58
+
59
+ def assign_groups
60
+ @composites.values.each{|composite| composite.composite_group = 'base' }
61
+ loadbatch_composite.composite_group = 'batch' if @option_audit == 'batch'
62
+ end
63
+
54
64
  def generate
55
65
  create_loadbatch if @option_audit == 'batch'
56
66
  super
@@ -1,5 +1,5 @@
1
1
  module ActiveFacts
2
2
  module Compositions
3
- VERSION = "1.9.18"
3
+ VERSION = "1.9.19"
4
4
  end
5
5
  end
@@ -8,7 +8,6 @@
8
8
  require 'activefacts/metamodel'
9
9
  require 'activefacts/compositions'
10
10
  require 'activefacts/generator'
11
- require 'byebug'
12
11
 
13
12
  module ActiveFacts
14
13
  module Generators #:nodoc:
@@ -50,7 +50,7 @@ module ActiveFacts
50
50
 
51
51
  def process_options options
52
52
  @value_width = (options.delete('value_width') || 32).to_i
53
- @phonetic_confidence = (options.delete('phonetic_confidence') || 70).to_i
53
+ @phonetic_confidence = (options.delete('phonetic_confidence') || 40).to_i
54
54
 
55
55
  super
56
56
  end
@@ -236,32 +236,55 @@ module ActiveFacts
236
236
  case sm
237
237
  when 'none' # Do not index this value
238
238
  nil
239
+
239
240
  when 'simple' # Disregard white-space only
240
241
  select(composite, truncate(col_expr, @value_width), 'simple', source_field, 1.0)
241
242
 
242
- when 'alpha' # Strip white space and punctuation, just use alphabetic characters
243
- select(composite, truncate(as_alpha(col_expr), @value_width), 'alpha', source_field, 0.9)
244
-
245
- when 'words' # Break the text into words and match each word like alpha
246
- nil # REVISIT: Implement this type
247
-
248
- # when 'phrases' # Words, but where adjacent sequences of words matter
249
- when 'typo' # Use trigram similarity to detect typographic errors
250
- # REVISIT: Implement this type properly
251
- select(composite, trigram(as_alpha(col_expr)), 'typo', source_field, 0.9)
243
+ when 'alpha', # Strip white space and punctuation, just use alphabetic characters
244
+ 'typo' # Use trigram similarity to detect typographic errors, over the same values
245
+ truncated = truncate(as_alpha(col_expr), @value_width)
246
+ select(
247
+ composite, truncated, sm, source_field,
248
+ "CASE WHEN #{truncated} = #{col_expr} THEN 1.0 ELSE 0.95 END" # Maybe exact match.
249
+ )
252
250
 
253
251
  when 'phonetic' # Use phonetic matching as well as trigrams
254
- phonetics(col_expr).map do |p|
255
- select(composite, p, 'phonetic', source_field, @phonetic_confidence/100.0)
256
- end
252
+ search_expr(composite, intrinsic_type, col_expr, ['typo'], source_field) <<
253
+ select(composite, phonetics(col_expr), 'phonetic', source_field, @phonetic_confidence/100.0, true)
254
+
255
+ when 'words' # Break the text into words and match each word like alpha
256
+ truncated = truncate(unnest(as_words(col_expr)), @value_width)
257
+ select(composite, truncated, sm, source_field, 0.90, true)
257
258
 
258
259
  when 'names' # Break the text into words and match each word like phonetic
259
- nil # REVISIT: Implement this type
260
+ truncated = truncate(unnest(as_words(col_expr, "''-")), @value_width) # N.B. ' is doubled for SQL
261
+ search_expr(composite, intrinsic_type, col_expr, ['words'], source_field) <<
262
+ phonetics(truncated).map do |phonetic|
263
+ select(composite, phonetic, 'names', source_field, @phonetic_confidence/100.0, true)
264
+ end
260
265
 
261
266
  when 'text' # Index a large text field using significant words and phrases
262
267
  nil # REVISIT: Implement this type
268
+
269
+ when 'number' # Cast to number and back to text to canonicalise the value;
270
+ # If the number doesn't match this regexp, we don't index it.
271
+ # This doesn't handle all valid Postgres numeric literals (e.g. 2.3e-4)
272
+ select(composite, col_expr, 'number', source_field, number_or_null(col_expr))
273
+
274
+ when 'phone' # Phone numbers; split, strip each to digits, take the last 8 of each
275
+ select(composite, phone_numbers(col_expr), 'phone', source_field, 1)
276
+
277
+ when 'email' # Use a regexp to find email addresses in this field
278
+ select(composite, truncate(email_addresses(col_expr), @value_width), 'email', source_field, 1)
279
+
280
+ when 'date' # Convert string to standard date format if it looks like a date, NULL otherwise
281
+ select(
282
+ composite, col_expr, 'date', source_field, 1,
283
+ %Q{CASE WHEN #{col_expr} ~ '^ *[0-9]+[.]?[0-9]*|[.][0-9]+) *$' THEN (#{col_expr}::numeric):text ELSE NULL END}
284
+ )
285
+
263
286
  else
264
- raise "Unknown search method #{sm}"
287
+ $stderrs.puts "Unknown search method #{sm}"
265
288
  end
266
289
  end
267
290
 
@@ -273,6 +296,7 @@ module ActiveFacts
273
296
  MM::DataType::TYPE_Decimal,
274
297
  MM::DataType::TYPE_Money
275
298
  # Produce a right-justified value
299
+ # REVISIT: This is a dumb thing to do.
276
300
  select(composite, lexical_decimal(col_expr, @value_width, value_type.scale), 'simple', source_field, 1)
277
301
 
278
302
  when MM::DataType::TYPE_Date
@@ -301,7 +325,7 @@ module ActiveFacts
301
325
  name.words.send(@column_case)*@column_joiner
302
326
  end
303
327
 
304
- def select composite, expression, processing, source_field, confidence = 1, where = []
328
+ def select composite, expression, processing, source_field, confidence = 1, distinct = false, where = []
305
329
  # These fields are in order of index precedence, to co-locate
306
330
  # comparable values regardless of source record type or column
307
331
  where << 'Value IS NOT NULL' if expression.to_s =~ /\bNULL\b/
@@ -313,17 +337,16 @@ module ActiveFacts
313
337
  source_table_name = stylise_column_name("SourceTable")
314
338
  source_field_name = stylise_column_name("SourceField")
315
339
  expression_text = expression.to_s
316
- expression_text = "ARRAY[#{expression_text}]" unless expression.is_array
317
340
  select = %Q{
318
- SELECT '#{processing}' AS #{processing_name},
341
+ SELECT#{distinct ? ' DISTINCT' : ''}
342
+ '#{processing}' AS #{processing_name},
319
343
  #{expression_text} AS #{value_name},
320
344
  #{load_batch_id_name},
321
- #{"%.2f" % confidence} AS #{confidence_name},
345
+ #{confidence} AS #{confidence_name},
322
346
  #{record_guid_name},
323
347
  '#{safe_table_name(composite)}' AS #{source_table_name},
324
348
  '#{source_field}' AS #{source_field_name}
325
- FROM #{safe_table_name(composite)}
326
- WHERE COALESCE(#{expression},'') <> ''}.
349
+ FROM #{safe_table_name(composite)}}.
327
350
  unindent
328
351
 
329
352
  if where.empty?
@@ -16,6 +16,7 @@ module ActiveFacts
16
16
  HEADER = "# Auto-generated from CQL, edits will be lost"
17
17
  def self.options
18
18
  ({
19
+ keep: ['Boolean', "Keep stale model files"],
19
20
  output: [String, "Overwrite model files into this output directory"],
20
21
  concern: [String, "Namespace for the concerns"],
21
22
  validation: ['Boolean', "Disable generation of validations"],
@@ -25,6 +26,7 @@ module ActiveFacts
25
26
  def initialize composition, options = {}
26
27
  @composition = composition
27
28
  @options = options
29
+ @option_keep = options.delete("keep")
28
30
  @option_output = options.delete("output")
29
31
  @option_concern = options.delete("concern")
30
32
  @option_validations = options.include?('validations') ? options.delete("validations") : true
@@ -35,7 +37,7 @@ module ActiveFacts
35
37
  end
36
38
 
37
39
  def generate
38
- list_extant_files if @option_output
40
+ list_extant_files if @option_output && !@option_keep
39
41
 
40
42
  @ok = true
41
43
  models =
@@ -46,7 +48,7 @@ module ActiveFacts
46
48
  compact*"\n"
47
49
 
48
50
  warn "\# #{@composition.name} generated with errors" unless @ok
49
- delete_old_generated_files if @option_output
51
+ delete_old_generated_files if @option_output && !@option_keep
50
52
 
51
53
  models
52
54
  end
@@ -158,6 +160,7 @@ module ActiveFacts
158
160
  composite.all_foreign_key_as_source_composite.
159
161
  sort_by{ |fk| fk.all_foreign_key_field.map(&:component).flat_map(&:path).map(&:rank_key) }.
160
162
  flat_map do |fk|
163
+ next nil if fk.all_foreign_key_field.size > 1
161
164
  association_name = fk.rails.from_association_name
162
165
 
163
166
  if association_name != fk.composite.rails.singular_name
@@ -171,9 +174,14 @@ module ActiveFacts
171
174
  foreign_key = ''
172
175
  end
173
176
 
177
+ single_fk_field = fk.all_foreign_key_field.single.component
178
+ if !single_fk_field.path_mandatory
179
+ optional = ", :optional => true"
180
+ end
181
+
174
182
  [
175
183
  fk.mapping ? " \# #{fk.mapping.comment}" : nil,
176
- " belongs_to :#{association_name}#{class_name}#{foreign_key}",
184
+ " belongs_to :#{association_name}#{class_name}#{foreign_key}#{optional}",
177
185
  fk.mapping ? '' : nil,
178
186
  ]
179
187
  end.compact
@@ -184,9 +192,10 @@ module ActiveFacts
184
192
  composite.all_foreign_key_as_target_composite.
185
193
  sort_by{ |fk| fk.all_foreign_key_field.map(&:component).flat_map(&:path).map(&:rank_key) }.
186
194
  flat_map do |fk|
195
+ next nil if fk.all_foreign_key_field.size > 1
187
196
 
188
197
  if fk.all_foreign_key_field.size > 1
189
- raise "Can't emit Rails associations for multi-part foreign key with #{fk.references.inspect}. Did you mean to use --surrogate?"
198
+ raise "Can't emit Rails associations for multi-part foreign key with #{fk.all_foreign_key_field.inspect}. Did you mean to use --surrogate?"
190
199
  end
191
200
 
192
201
  association_type, association_name = *fk.rails.to_association
@@ -205,15 +214,22 @@ module ActiveFacts
205
214
  fk.source_composite.primary_index.all_index_field.map(&:component).flat_map do |ic|
206
215
  next nil if ic.is_a?(MM::Indicator) # or use rails.plural_name(ic.references[0].to_names) ?
207
216
  onward_fks = ic.all_foreign_key_field.map(&:foreign_key)
208
- next nil if onward_fks.size == 0 or onward_fks.detect{|fk| fk.composite == composite} # Skip the back-reference
209
- # REVISIT: This far association name needs to be augmented for its role name
210
- " has_many :#{onward_fks[0].composite.rails.plural_name}, :through => :#{association_name}"
217
+ next nil if onward_fks.size == 0 or onward_fks.detect{|ofk| ofk.composite == composite} # Skip the back-reference
218
+ # This far association name needs to be augmented for its role name
219
+ # so the reverse associations still work for customised association names
220
+ source =
221
+ if composite.rails.singular_name != fk.rails.from_association_name
222
+ ", :source => :#{fk.rails.from_association_name}"
223
+ else
224
+ ''
225
+ end
226
+ " has_many :#{onward_fks[0].composite.rails.plural_name}, :through => :#{association_name}#{source}"
211
227
  end.compact
212
228
  else
213
229
  []
214
230
  end +
215
231
  [fk.mapping ? '' : nil]
216
- end
232
+ end.compact
217
233
  end
218
234
 
219
235
  def column_constraints composite
@@ -223,7 +239,9 @@ module ActiveFacts
223
239
  next unless component.path_mandatory && !component.is_a?(Metamodel::Indicator)
224
240
  next if composite.primary_index != composite.natural_index && composite.primary_index.all_index_field.detect{|ixf| ixf.component == component}
225
241
  next if component.is_a?(Metamodel::Mapping) && component.object_type.is_a?(Metamodel::ValueType) && component.is_auto_assigned
226
- [ " validates :#{component.column_name.snakecase}, :presence => true" ]
242
+ if component.all_foreign_key_field.size == 0
243
+ [ " validates :#{component.column_name.snakecase}, :presence => true" ]
244
+ end
227
245
  end.compact
228
246
  ccs.unshift("") unless ccs.empty?
229
247
  ccs
@@ -19,6 +19,7 @@ module ActiveFacts
19
19
  attr_reader :type_num # ActiveFacts::Metamodel::DataType number
20
20
  attr_reader :value # String representation of the expression
21
21
  attr_reader :is_mandatory # false if nullable
22
+ # This doesn't handle Postgres expressions, which can include a sub-table (e.g. via unnest)
22
23
  attr_reader :is_array # the expression returns an array of the specified type
23
24
 
24
25
  # Construct an expression that addresses a field from a Metamodel::Component
@@ -146,22 +146,24 @@ module ActiveFacts
146
146
  "'|'::text || " +
147
147
  expressions.map(&:to_s) * " || '|'::text || " +
148
148
  " || '|'::text",
149
- MM::DataType::TYPE_String,
150
- true
149
+ MM::DataType::TYPE_String
151
150
  )
152
151
  end
153
152
 
154
153
  # Return an expression that yields a hash of the given expression
155
154
  def hash expr, algo = 'sha1'
156
- Expression.new("digest(#{expr}, '#{algo}')", MM::DataType::TYPE_Binary, expr.is_mandatory)
155
+ Expression.new("digest(#{expr}, '#{algo}')", MM::DataType::TYPE_Binary, expr.is_mandatory, expr.is_array)
157
156
  end
158
157
 
159
158
  def truncate expr, length
160
- Expression.new("substring(#{expr} for #{length})", MM::DataType::TYPE_String, expr.is_mandatory)
159
+ Expression.new("left(#{expr}, #{length})", MM::DataType::TYPE_String, expr.is_mandatory, expr.is_array)
161
160
  end
162
161
 
163
162
  def trigram expr
164
- Expression.new("show_trgm(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
163
+ # This is not a useful way to handle trigrams. Instead, create a trigram index
164
+ # over an ordinary text index value, and use a similarity search over that.
165
+ # Expression.new("show_trgm(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
166
+ expr
165
167
  end
166
168
 
167
169
  # Produce a lexically-sortable decimal representation of the given numeric expression, to the overall specified length and scale
@@ -174,6 +176,39 @@ module ActiveFacts
174
176
  )
175
177
  end
176
178
 
179
+ def number_or_null expr
180
+ Expression.new(
181
+ %Q{CASE WHEN #{expr} ~ '^ *[-+]?([0-9]+[.]?[0-9]*|[.][0-9]+) *$' THEN #{expr}::numeric ELSE NULL END},
182
+ MM::DataType::TYPE_Real,
183
+ false
184
+ )
185
+ end
186
+
187
+ def split_on_separators expr, seps = ',\\\\|'
188
+ Expression.new(
189
+ %Q{regexp_split_to_table(#{expr}, E'#{seps}')},
190
+ MM::DataType::TYPE_String, true, true
191
+ )
192
+ end
193
+
194
+ # Extract separated numbers, remove non-digits, take the last 8 (removing area codes etc)
195
+ def phone_numbers expr
196
+ Expression.new(
197
+ %Q{right(#{split_on_separators(%Q{regexp_replace(#{expr}, '[^0-9]+', '', 'g')})}, 8)},
198
+ MM::DataType::TYPE_String,
199
+ true
200
+ )
201
+ end
202
+
203
+ # Extract separated numbers, remove non-digits, take the last 8 (removing area codes etc)
204
+ def email_addresses expr
205
+ Expression.new(
206
+ %Q{unnest(regexp_matches(#{expr}, E'[-_.[:alnum:]]+@[-_.[:alnum:]]+'))},
207
+ MM::DataType::TYPE_String,
208
+ true
209
+ )
210
+ end
211
+
177
212
  def lexical_date expr
178
213
  Expression.new("to_char(#{expr}, 'YYYY-MM-DD')", MM::DataType::TYPE_String, expr.is_mandatory)
179
214
  end
@@ -190,13 +225,38 @@ module ActiveFacts
190
225
  Expression.new("btrim(lower(regexp_replace(#{expr}, '[^[:alnum:]]+', ' ', 'g')))", MM::DataType::TYPE_String, expr.is_mandatory)
191
226
  end
192
227
 
228
+ def as_words expr, extra_word_chars = ''
229
+ Expression.new(
230
+ "regexp_split_to_array(lower(#{expr}), E'[^[:alnum:]#{extra_word_chars}]+')",
231
+ MM::DataType::TYPE_String, expr.is_mandatory, true
232
+ )
233
+ end
234
+
235
+ def unnest expr
236
+ Expression.new("unnest(#{expr})", MM::DataType::TYPE_String, expr.is_mandatory, true)
237
+ end
238
+
193
239
  def phonetics expr
194
- dmetaphone = "dmetaphone(#{expr})"
195
- dmetaphone_alt = "dmetaphone_alt(#{expr})"
196
- [
197
- Expression.new(dmetaphone, MM::DataType::TYPE_String, expr.is_mandatory),
198
- Expression.new("CASE WHEN #{dmetaphone} <> #{dmetaphone_alt} THEN #{dmetaphone_alt} ELSE NULL END", MM::DataType::TYPE_String, expr.is_mandatory)
199
- ]
240
+ if expr.is_array
241
+ [
242
+ Expression.new(
243
+ %Q{dmetaphone(#{expr})},
244
+ MM::DataType::TYPE_String,
245
+ expr.is_mandatory
246
+ ),
247
+ Expression.new(
248
+ %Q{dmetaphone_alt(#{expr})},
249
+ MM::DataType::TYPE_String,
250
+ expr.is_mandatory
251
+ )
252
+ ]
253
+ else
254
+ Expression.new(
255
+ %Q{unnest(ARRAY[dmetaphone(#{expr}), dmetaphone_alt(#{expr})])},
256
+ MM::DataType::TYPE_String,
257
+ expr.is_mandatory
258
+ )
259
+ end
200
260
  end
201
261
 
202
262
  # Reserved words cannot be used anywhere without quoting.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: activefacts-compositions
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.18
4
+ version: 1.9.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Clifford Heath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-15 00:00:00.000000000 Z
11
+ date: 2018-02-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -28,16 +28,16 @@ dependencies:
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '10'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '10'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -257,7 +257,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
257
257
  version: '0'
258
258
  requirements: []
259
259
  rubyforge_project:
260
- rubygems_version: 2.4.5
260
+ rubygems_version: 2.6.13
261
261
  signing_key:
262
262
  specification_version: 4
263
263
  summary: Create and represent composite schemas, schema transforms and data transforms