trackit_scraper 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,7 +24,7 @@ class ParallelRequestScraper
24
24
 
25
25
  until @request_ids.empty?
26
26
  request = scrape_next_request b
27
- request ? yield(request) : break
27
+ yield request
28
28
  end
29
29
 
30
30
  b.close
@@ -8,9 +8,12 @@ class RequestPage
8
8
  end
9
9
 
10
10
  def get_request
11
+ r = { id: request_id }
12
+ return r if request_not_found?
13
+
11
14
  request_info_table = RequestInfoTable.new @b.tables[1]
12
15
  request_history_table = RequestHistoryTable.new @b.tables[3]
13
- r = { id: request_id }
16
+
14
17
  r.merge! request_info_table.get_request_info
15
18
  r.merge! request_history_table.get_resolution_info
16
19
  r
@@ -22,4 +25,8 @@ class RequestPage
22
25
  @b.text[/Request #(\d+)/, 1]
23
26
  end
24
27
 
28
+ def request_not_found?
29
+ @b.text =~ /Request #\d+ not found/
30
+ end
31
+
25
32
  end
@@ -32,9 +32,14 @@ class TrackIt
32
32
 
33
33
  def scrape_requests(request_ids)
34
34
  FileUtils.mkdir_p @output_dir
35
+ request_ids = request_ids - existing_request_ids
35
36
  @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
36
37
  end
37
38
 
39
+ def existing_request_ids
40
+ Dir["#@output_dir/*"].map { |f| File.basename(f, '.json').to_i }
41
+ end
42
+
38
43
  def write_request_file(request)
39
44
  File.write request_file_path(request), request.to_json
40
45
  print '.'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trackit_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-11 00:00:00.000000000 Z
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json