pupa 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b9732b4a0ea1dbd2eb9d8fff1d533bbfd397db6a
4
- data.tar.gz: 659874bfd458daa7c09be7f0f2a5feeb9deec005
3
+ metadata.gz: 796eaf63778076cbe361cae74186cdef026f88b2
4
+ data.tar.gz: d90a3bccc968f958b137254817c548e7b1b11a5b
5
5
  SHA512:
6
- metadata.gz: f24268480ad0447df95bbfa82a86c13d4d82e015309932571b102b73813d480e52d35124ca900f94d9740d58d1b8f3f84e2b9b484dc8c3b879ab6efec8593683
7
- data.tar.gz: e09836eb51002368554645177bbfbee70e91b3dd5900746e26aa10f3196adb5651e455b1d3a87617e325381169367debc5af0b3d3f2ace604aa8d517c8fc585a
6
+ metadata.gz: 5a42c57c20190b0678e60bb17832525b911e4f33253212be5dbf27475816f567e23f98be870173793bd11681d510ae760912cacbb1916c671c81bad75ad5ab81
7
+ data.tar.gz: b307018aecc3d92fa976c9c3a6c2a7fafb548a4a3a57aa3b2a6aa5c4c7fd029c64ebfb9de6947e6202dc223ca3806893e921146188a9a839c3c7993711b4053d
@@ -32,7 +32,8 @@ module Pupa
32
32
  # Declare the class' properties.
33
33
  #
34
34
  # When converting an object to a hash using the `to_h` method, only the
35
- # properties declared with `attr_accessor` will be included in the hash.
35
+ # properties declared with `attr_accessor` or `attr_reader` will be
36
+ # included in the hash.
36
37
  #
37
38
  # @param [Array<Symbol>] the class' properties
38
39
  def attr_accessor(*attributes)
@@ -40,6 +41,18 @@ module Pupa
40
41
  super
41
42
  end
42
43
 
44
+ # Declare the class' properties.
45
+ #
46
+ # When converting an object to a hash using the `to_h` method, only the
47
+ # properties declared with `attr_accessor` or `attr_reader` will be
48
+ # included in the hash.
49
+ #
50
+ # @param [Array<Symbol>] the class' properties
51
+ def attr_reader(*attributes)
52
+ self.properties += attributes # use assignment to not overwrite the parent's attribute
53
+ super
54
+ end
55
+
43
56
  # Declare the class' foreign keys.
44
57
  #
45
58
  # When importing scraped objects, the foreign keys will be used to draw a
@@ -17,6 +17,8 @@ module Pupa
17
17
  class_attribute :tasks
18
18
  self.tasks = []
19
19
 
20
+ attr_reader :report
21
+
20
22
  def_delegators :@logger, :debug, :info, :warn, :error, :fatal
21
23
 
22
24
  # @param [String] output_dir the directory in which to dump JSON documents
@@ -31,6 +33,7 @@ module Pupa
31
33
  @level = level
32
34
  @logger = Logger.new('pupa', level: level, logdev: logdev)
33
35
  @client = Client.new(cache_dir: cache_dir, expires_in: expires_in, level: level)
36
+ @report = {}
34
37
  end
35
38
 
36
39
  # Retrieves and parses a document with a GET request.
@@ -100,10 +103,14 @@ module Pupa
100
103
  # Dumps scraped objects to disk.
101
104
  #
102
105
  # @param [Symbol] task_name the name of the scraping task to perform
106
+ # @return [Integer] the number of scraped objects
103
107
  def dump_scraped_objects(task_name)
108
+ count = 0
104
109
  send(task_name).each do |object|
110
+ count += 1 # we don't know the size of the enumeration
105
111
  dump_scraped_object(object)
106
112
  end
113
+ count
107
114
  end
108
115
 
109
116
  # Saves scraped objects to a database.
@@ -114,6 +121,8 @@ module Pupa
114
121
  # @raises [Pupa::Errors::DuplicateDocumentError] if duplicate objects were
115
122
  # inadvertently saved to the database
116
123
  def import
124
+ @report[:import] = {}
125
+
117
126
  objects = deduplicate(load_scraped_objects)
118
127
 
119
128
  object_id_to_database_id = {}
@@ -126,7 +135,7 @@ module Pupa
126
135
  object = objects[id]
127
136
  resolve_foreign_keys(object, object_id_to_database_id)
128
137
  # The dependency graph strategy only works if there are no foreign objects.
129
- object_id_to_database_id[id] = Persistence.new(object).save
138
+ object_id_to_database_id[id] = import_object(object)
130
139
  end
131
140
  else
132
141
  size = objects.size
@@ -158,7 +167,7 @@ module Pupa
158
167
  progress_made = true
159
168
  resolve_foreign_keys(object, object_id_to_database_id)
160
169
  resolve_foreign_objects(object)
161
- object_id_to_database_id[id] = Persistence.new(object).save
170
+ object_id_to_database_id[id] = import_object(object)
162
171
  end
163
172
  end
164
173
 
@@ -204,7 +213,7 @@ module Pupa
204
213
  # @raises [Pupa::Errors::DuplicateObjectIdError]
205
214
  def dump_scraped_object(object)
206
215
  type = object.class.to_s.demodulize.underscore
207
- basename = "#{type}_#{object._id}.json"
216
+ basename = "#{type}_#{object._id.gsub(File::SEPARATOR, '_')}.json"
208
217
  path = File.join(@output_dir, basename)
209
218
 
210
219
  if File.exist?(path)
@@ -336,7 +345,7 @@ module Pupa
336
345
 
337
346
  # Resolves an object's foreign objects to database IDs.
338
347
  #
339
- # @param [Object] an object
348
+ # @param [Object] object an object
340
349
  # @raises [Pupa::Errors::MissingDatabaseIdError]
341
350
  def resolve_foreign_objects(object)
342
351
  object.foreign_objects.each do |property|
@@ -347,5 +356,17 @@ module Pupa
347
356
  end
348
357
  end
349
358
  end
359
+
360
+ # @param [Object] object an object
361
+ def import_object(object)
362
+ id = Persistence.new(object).save
363
+ @report[:import][object._type] ||= Hash.new(0)
364
+ if id == object._id
365
+ @report[:import][object._type][:insert] += 1
366
+ else
367
+ @report[:import][object._type][:update] += 1
368
+ end
369
+ id
370
+ end
350
371
  end
351
372
  end
data/lib/pupa/runner.rb CHANGED
@@ -162,6 +162,15 @@ module Pupa
162
162
 
163
163
  exit if options.dry_run
164
164
 
165
+ report = {
166
+ plan: {
167
+ processor: @processor_class,
168
+ arguments: options.to_h,
169
+ options: rest,
170
+ },
171
+ start: Time.now.utc,
172
+ }
173
+
165
174
  Pupa.session = Moped::Session.new([options.host_with_port], database: options.database)
166
175
 
167
176
  if options.actions.delete('scrape')
@@ -172,14 +181,22 @@ module Pupa
172
181
  FileUtils.rm(path)
173
182
  end
174
183
 
184
+ report[:scrape] = {}
175
185
  options.tasks.each do |task_name|
176
- processor.dump_scraped_objects(task_name)
186
+ report[:scrape][task_name] = processor.dump_scraped_objects(task_name)
177
187
  end
178
188
  end
179
189
 
180
190
  options.actions.each do |action|
181
191
  processor.send(action)
192
+ if processor.report.key?(action.to_sym)
193
+ report.update(action.to_sym => processor.report[action.to_sym])
194
+ end
182
195
  end
196
+
197
+ report[:end] = Time.now.utc
198
+ report[:time] = report[:end] - report[:start]
199
+ puts JSON.dump(report)
183
200
  end
184
201
  end
185
202
  end
data/lib/pupa/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Pupa
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pupa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Open North
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-16 00:00:00.000000000 Z
11
+ date: 2013-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport