assimilate 0.3.5 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/assimilate.rb +5 -0
- data/lib/assimilate/batch.rb +28 -11
- data/lib/assimilate/command.rb +26 -7
- data/lib/assimilate/version.rb +1 -1
- metadata +3 -3
data/lib/assimilate.rb
CHANGED
@@ -16,7 +16,12 @@ module Assimilate
|
|
16
16
|
catalog = Catalog.new(:config => opts[:config])
|
17
17
|
batcher = catalog.start_batch(opts.merge(:filename => filename))
|
18
18
|
|
19
|
+
headers = nil
|
19
20
|
slurp(filename) do |rec|
|
21
|
+
if opts[:subset] && !headers
|
22
|
+
headers = rec.keys
|
23
|
+
batcher.prime(headers)
|
24
|
+
end
|
20
25
|
batcher << rec
|
21
26
|
end
|
22
27
|
if opts[:commit]
|
data/lib/assimilate/batch.rb
CHANGED
@@ -10,6 +10,9 @@ class Assimilate::Batch
|
|
10
10
|
@idfield = args[:idfield]
|
11
11
|
@filename = args[:filename]
|
12
12
|
|
13
|
+
@subset = args[:subset]
|
14
|
+
@suppress_deletes = args[:nodeletes]
|
15
|
+
|
13
16
|
load_baseline
|
14
17
|
|
15
18
|
@noops = []
|
@@ -19,6 +22,10 @@ class Assimilate::Batch
|
|
19
22
|
@resolved = false
|
20
23
|
end
|
21
24
|
|
25
|
+
def prime(fieldnames)
|
26
|
+
@fields = fieldnames
|
27
|
+
end
|
28
|
+
|
22
29
|
def load_baseline
|
23
30
|
stored_records = @catalog.catalog.find(@domainkey => @domain, @idfield => {"$exists" => 1}).to_a
|
24
31
|
@baseline = stored_records.each_with_object({}) do |rec, h|
|
@@ -34,7 +41,11 @@ class Assimilate::Batch
|
|
34
41
|
# Any nil values are ignored; these should not be stored but if they do appear in the catalog then don't want
|
35
42
|
# to include them when comparing new records vs. old.
|
36
43
|
def stripped_record_for(key)
|
37
|
-
|
44
|
+
if @subset
|
45
|
+
@baseline[key] && @baseline[key].select {|k,v| @fields.include?(k)}
|
46
|
+
else
|
47
|
+
@baseline[key] && @baseline[key].select {|k,v| k !~ /^_/ && !v.nil?}
|
48
|
+
end
|
38
49
|
end
|
39
50
|
|
40
51
|
def <<(record)
|
@@ -65,7 +76,7 @@ class Assimilate::Batch
|
|
65
76
|
if !@resolved
|
66
77
|
@deleted_keys = (@baseline.keys - @seen.keys).reject {|k| @baseline[k][@catalog.config[:deletion_marker]]}
|
67
78
|
|
68
|
-
@updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(
|
79
|
+
@updated_field_counts = @changes.each_with_object(Hash.new(0)) do |(_,diffs),h|
|
69
80
|
# key = rec[idfield]
|
70
81
|
# diffs = deltas(stripped_record_for(key), rec)
|
71
82
|
diffs.keys.each do |f|
|
@@ -117,14 +128,18 @@ class Assimilate::Batch
|
|
117
128
|
end
|
118
129
|
|
119
130
|
def apply_deletes
|
120
|
-
@
|
121
|
-
@
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
131
|
+
unless @suppress_deletes
|
132
|
+
@deleted_keys.each do |key|
|
133
|
+
@catalog.catalog.update(
|
134
|
+
{
|
135
|
+
@domainkey => domain,
|
136
|
+
idfield => key
|
137
|
+
},
|
138
|
+
{
|
139
|
+
"$set" => {@catalog.config[:deletion_marker] => datestamp}
|
140
|
+
}
|
141
|
+
)
|
142
|
+
end
|
128
143
|
end
|
129
144
|
end
|
130
145
|
|
@@ -145,7 +160,9 @@ class Assimilate::Batch
|
|
145
160
|
@domainkey => domain,
|
146
161
|
idfield => key
|
147
162
|
},
|
148
|
-
{
|
163
|
+
{
|
164
|
+
"$set" => diffs.merge(marker => datestamp)
|
165
|
+
}
|
149
166
|
)
|
150
167
|
end
|
151
168
|
end
|
data/lib/assimilate/command.rb
CHANGED
@@ -20,6 +20,14 @@ class Assimilate::Command
|
|
20
20
|
@options[:commit] = true
|
21
21
|
end
|
22
22
|
|
23
|
+
opts.on("--subset", "Only consider the fields in the input file") do
|
24
|
+
@options[:subset] = true
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("--nodeletes", "Do NOT delete existing records that are not present in the input") do
|
28
|
+
@options[:nodeletes] = true
|
29
|
+
end
|
30
|
+
|
23
31
|
opts.on("--key FIELDNAME", String, "(*extend* only; optional) Hash key to store extended attributes under") do |f|
|
24
32
|
@options[:key] = f
|
25
33
|
end
|
@@ -42,10 +50,12 @@ class Assimilate::Command
|
|
42
50
|
@command = argv.shift
|
43
51
|
filenames = @parser.parse(argv)
|
44
52
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
unless command == 'version'
|
54
|
+
raise OptionParser::MissingArgument, "missing config" unless options[:config]
|
55
|
+
raise OptionParser::MissingArgument, "missing idfield" unless options[:idfield]
|
56
|
+
raise OptionParser::MissingArgument, "missing domain" unless options[:domain]
|
57
|
+
raise "missing filename" unless filenames.any?
|
58
|
+
end
|
49
59
|
|
50
60
|
# argv remnants are filenames
|
51
61
|
[@command, @options, filenames]
|
@@ -55,6 +65,9 @@ class Assimilate::Command
|
|
55
65
|
filename = filenames.first
|
56
66
|
|
57
67
|
case command
|
68
|
+
when 'version'
|
69
|
+
puts "Assimilate #{Assimilate::VERSION}"
|
70
|
+
|
58
71
|
when 'load'
|
59
72
|
raise OptionParser::MissingArgument, "missing datestamp" unless options[:datestamp]
|
60
73
|
|
@@ -92,15 +105,21 @@ EOT
|
|
92
105
|
Unchanged records: #{results[:unchanged_count]}
|
93
106
|
New records: #{results[:adds_count]} #{idsummary(results[:new_ids])}
|
94
107
|
Deletes: #{results[:deletes_count]} #{idsummary(results[:deleted_ids])}
|
108
|
+
EOT
|
109
|
+
if options[:nodeletes]
|
110
|
+
warn "\t\t\tNO DELETIONS"
|
111
|
+
end
|
112
|
+
|
113
|
+
warn <<-EOT
|
95
114
|
Updates: #{results[:updates_count]} #{idsummary(results[:updated_ids])}
|
96
115
|
EOT
|
97
116
|
if results[:updated_fields].any?
|
98
117
|
$stderr.puts <<-EOT
|
99
|
-
|
118
|
+
Update counts by field:
|
100
119
|
EOT
|
101
|
-
results[:updated_fields].each do |k,v|
|
120
|
+
results[:updated_fields].sort.each do |k,v|
|
102
121
|
$stderr.puts <<-EOT
|
103
|
-
|
122
|
+
#{k.ljust(30, '.')}#{"%6d" % v}
|
104
123
|
EOT
|
105
124
|
end
|
106
125
|
end
|
data/lib/assimilate/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: assimilate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mongo
|
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
segments:
|
180
180
|
- 0
|
181
|
-
hash:
|
181
|
+
hash: 4032073337181857520
|
182
182
|
requirements: []
|
183
183
|
rubyforge_project: assimilate
|
184
184
|
rubygems_version: 1.8.24
|