assimilate 0.3.5 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/assimilate.rb CHANGED
@@ -16,7 +16,12 @@ module Assimilate
16
16
  catalog = Catalog.new(:config => opts[:config])
17
17
  batcher = catalog.start_batch(opts.merge(:filename => filename))
18
18
 
19
+ headers = nil
19
20
  slurp(filename) do |rec|
21
+ if opts[:subset] && !headers
22
+ headers = rec.keys
23
+ batcher.prime(headers)
24
+ end
20
25
  batcher << rec
21
26
  end
22
27
  if opts[:commit]
@@ -10,6 +10,9 @@ class Assimilate::Batch
10
10
  @idfield = args[:idfield]
11
11
  @filename = args[:filename]
12
12
 
13
+ @subset = args[:subset]
14
+ @suppress_deletes = args[:nodeletes]
15
+
13
16
  load_baseline
14
17
 
15
18
  @noops = []
@@ -19,6 +22,10 @@ class Assimilate::Batch
19
22
  @resolved = false
20
23
  end
21
24
 
25
+ def prime(fieldnames)
26
+ @fields = fieldnames
27
+ end
28
+
22
29
  def load_baseline
23
30
  stored_records = @catalog.catalog.find(@domainkey => @domain, @idfield => {"$exists" => 1}).to_a
24
31
  @baseline = stored_records.each_with_object({}) do |rec, h|
@@ -34,7 +41,11 @@ class Assimilate::Batch
34
41
  # Any nil values are ignored; these should not be stored but if they do appear in the catalog then don't want
35
42
  # to include them when comparing new records vs. old.
36
43
  def stripped_record_for(key)
37
- @baseline[key] && @baseline[key].select {|k,v| k !~ /^_/ && !v.nil?}
44
+ if @subset
45
+ @baseline[key] && @baseline[key].select {|k,v| @fields.include?(k)}
46
+ else
47
+ @baseline[key] && @baseline[key].select {|k,v| k !~ /^_/ && !v.nil?}
48
+ end
38
49
  end
39
50
 
40
51
  def <<(record)
@@ -65,7 +76,7 @@ class Assimilate::Batch
65
76
  if !@resolved
66
77
  @deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
67
78
 
68
- @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(key,diffs),h|
79
+ @updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(_,diffs),h|
69
80
  # key = rec[idfield]
70
81
  # diffs = deltas(stripped_record_for(key), rec)
71
82
  diffs.keys.each do |f|
@@ -117,14 +128,18 @@ class Assimilate::Batch
117
128
  end
118
129
 
119
130
  def apply_deletes
120
- @deleted_keys.each do |key|
121
- @catalog.catalog.update(
122
- {
123
- @domainkey => domain,
124
- idfield => key
125
- },
126
- {"$set" => {@catalog.config[:deletion_marker] => datestamp}}
127
- )
131
+ unless @suppress_deletes
132
+ @deleted_keys.each do |key|
133
+ @catalog.catalog.update(
134
+ {
135
+ @domainkey => domain,
136
+ idfield => key
137
+ },
138
+ {
139
+ "$set" => {@catalog.config[:deletion_marker] => datestamp}
140
+ }
141
+ )
142
+ end
128
143
  end
129
144
  end
130
145
 
@@ -145,7 +160,9 @@ class Assimilate::Batch
145
160
  @domainkey => domain,
146
161
  idfield => key
147
162
  },
148
- {"$set" => diffs.merge(marker => datestamp)}
163
+ {
164
+ "$set" => diffs.merge(marker => datestamp)
165
+ }
149
166
  )
150
167
  end
151
168
  end
@@ -20,6 +20,14 @@ class Assimilate::Command
20
20
  @options[:commit] = true
21
21
  end
22
22
 
23
+ opts.on("--subset", "Only consider the fields in the input file") do
24
+ @options[:subset] = true
25
+ end
26
+
27
+ opts.on("--nodeletes", "Do NOT delete existing records that are not present in the input") do
28
+ @options[:nodeletes] = true
29
+ end
30
+
23
31
  opts.on("--key FIELDNAME", String, "(*extend* only; optional) Hash key to store extended attributes under") do |f|
24
32
  @options[:key] = f
25
33
  end
@@ -42,10 +50,12 @@ class Assimilate::Command
42
50
  @command = argv.shift
43
51
  filenames = @parser.parse(argv)
44
52
 
45
- raise OptionParser::MissingArgument, "missing config" unless options[:config]
46
- raise OptionParser::MissingArgument, "missing idfield" unless options[:idfield]
47
- raise OptionParser::MissingArgument, "missing domain" unless options[:domain]
48
- raise "missing filename" unless filenames.any?
53
+ unless command == 'version'
54
+ raise OptionParser::MissingArgument, "missing config" unless options[:config]
55
+ raise OptionParser::MissingArgument, "missing idfield" unless options[:idfield]
56
+ raise OptionParser::MissingArgument, "missing domain" unless options[:domain]
57
+ raise "missing filename" unless filenames.any?
58
+ end
49
59
 
50
60
  # argv remnants are filenames
51
61
  [@command, @options, filenames]
@@ -55,6 +65,9 @@ class Assimilate::Command
55
65
  filename = filenames.first
56
66
 
57
67
  case command
68
+ when 'version'
69
+ puts "Assimilate #{Assimilate::VERSION}"
70
+
58
71
  when 'load'
59
72
  raise OptionParser::MissingArgument, "missing datestamp" unless options[:datestamp]
60
73
 
@@ -92,15 +105,21 @@ EOT
92
105
  Unchanged records: #{results[:unchanged_count]}
93
106
  New records: #{results[:adds_count]} #{idsummary(results[:new_ids])}
94
107
  Deletes: #{results[:deletes_count]} #{idsummary(results[:deleted_ids])}
108
+ EOT
109
+ if options[:nodeletes]
110
+ warn "\t\t\tNO DELETIONS"
111
+ end
112
+
113
+ warn <<-EOT
95
114
  Updates: #{results[:updates_count]} #{idsummary(results[:updated_ids])}
96
115
  EOT
97
116
  if results[:updated_fields].any?
98
117
  $stderr.puts <<-EOT
99
- Counts by field:
118
+ Update counts by field:
100
119
  EOT
101
- results[:updated_fields].each do |k,v|
120
+ results[:updated_fields].sort.each do |k,v|
102
121
  $stderr.puts <<-EOT
103
- #{k}: #{v}
122
+ #{k.ljust(30, '.')}#{"%6d" % v}
104
123
  EOT
105
124
  end
106
125
  end
@@ -1,3 +1,3 @@
1
1
  module Assimilate
2
- VERSION = "0.3.5"
2
+ VERSION = "0.4.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: assimilate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-10 00:00:00.000000000 Z
12
+ date: 2013-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mongo
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  version: '0'
179
179
  segments:
180
180
  - 0
181
- hash: 3776853293402426240
181
+ hash: 4032073337181857520
182
182
  requirements: []
183
183
  rubyforge_project: assimilate
184
184
  rubygems_version: 1.8.24