trackit_scraper 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,7 +24,7 @@ class ParallelRequestScraper
24
24
 
25
25
  until @request_ids.empty?
26
26
  request = scrape_next_request b
27
- request ? yield(request) : break
27
+ yield request
28
28
  end
29
29
 
30
30
  b.close
@@ -8,9 +8,12 @@ class RequestPage
8
8
  end
9
9
 
10
10
  def get_request
11
+ r = { id: request_id }
12
+ return r if request_not_found?
13
+
11
14
  request_info_table = RequestInfoTable.new @b.tables[1]
12
15
  request_history_table = RequestHistoryTable.new @b.tables[3]
13
- r = { id: request_id }
16
+
14
17
  r.merge! request_info_table.get_request_info
15
18
  r.merge! request_history_table.get_resolution_info
16
19
  r
@@ -22,4 +25,8 @@ class RequestPage
22
25
  @b.text[/Request #(\d+)/, 1]
23
26
  end
24
27
 
28
+ def request_not_found?
29
+ @b.text =~ /Request #\d+ not found/
30
+ end
31
+
25
32
  end
@@ -32,9 +32,14 @@ class TrackIt
32
32
 
33
33
  def scrape_requests(request_ids)
34
34
  FileUtils.mkdir_p @output_dir
35
+ request_ids = request_ids - existing_request_ids
35
36
  @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
36
37
  end
37
38
 
39
+ def existing_request_ids
40
+ Dir["#@output_dir/*"].map { |f| File.basename(f, '.json').to_i }
41
+ end
42
+
38
43
  def write_request_file(request)
39
44
  File.write request_file_path(request), request.to_json
40
45
  print '.'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trackit_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-11 00:00:00.000000000 Z
12
+ date: 2013-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json