trackit_scraper 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,22 +1,22 @@
1
- class LoginCommand
2
-
3
- def initialize(username, password, navigator)
4
- @username = username
5
- @password = password
6
- @navigator = navigator
7
- end
8
-
9
- def execute(b)
10
- @navigator.goto 'hd/index.ssp', b
11
- login b
12
- end
13
-
14
- private
15
-
16
- def login(b)
17
- b.text_field(name: 'user_id').set @username
18
- b.text_field(name: 'user_pwd').set @password
19
- b.button(value: 'Log on').click
20
- end
21
-
1
+ class LoginCommand
2
+
3
+ def initialize(username, password, navigator)
4
+ @username = username
5
+ @password = password
6
+ @navigator = navigator
7
+ end
8
+
9
+ def execute(b)
10
+ @navigator.goto 'hd/index.ssp', b
11
+ login b
12
+ end
13
+
14
+ private
15
+
16
+ def login(b)
17
+ b.text_field(name: 'user_id').set @username
18
+ b.text_field(name: 'user_pwd').set @password
19
+ b.button(value: 'Log on').click
20
+ end
21
+
22
22
  end
@@ -1,12 +1,12 @@
1
- class Navigator
2
-
3
- def initialize(base_url)
4
- @base_url = base_url
5
- end
6
-
7
- def goto(relative_url, b)
8
- url = File.join @base_url, relative_url
9
- b.goto url
10
- end
11
-
1
+ class Navigator
2
+
3
+ def initialize(base_url)
4
+ @base_url = base_url
5
+ end
6
+
7
+ def goto(relative_url, b)
8
+ url = File.join @base_url, relative_url
9
+ b.goto url
10
+ end
11
+
12
12
  end
@@ -1,45 +1,45 @@
1
- require 'thread'
2
- require 'watir-webdriver'
3
- require_relative 'request_page'
4
-
5
- class ParallelRequestScraper
6
-
7
- def initialize(navigator, login_command, threads)
8
- @navigator = navigator
9
- @login_command = login_command
10
- @threads = threads
11
- end
12
-
13
- def scrape(request_ids, &block)
14
- @request_ids = request_ids.clone
15
- @semaphore = Mutex.new
16
- @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
- end
18
-
19
- private
20
-
21
- def scrape_requests
22
- b = Watir::Browser.new
23
- @login_command.execute b
24
-
25
- until @request_ids.empty?
26
- request = scrape_next_request b
27
- yield request
28
- end
29
-
30
- b.close
31
- end
32
-
33
- def scrape_next_request(b)
34
- id = next_request_id
35
- return unless id
36
- @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
- request_page = RequestPage.new b
38
- request_page.get_request
39
- end
40
-
41
- def next_request_id
42
- @semaphore.synchronize { return @request_ids.pop }
43
- end
44
-
1
+ require 'thread'
2
+ require 'watir-webdriver'
3
+ require_relative 'request_page'
4
+
5
+ class ParallelRequestScraper
6
+
7
+ def initialize(navigator, login_command, threads)
8
+ @navigator = navigator
9
+ @login_command = login_command
10
+ @threads = threads
11
+ end
12
+
13
+ def scrape(request_ids, &block)
14
+ @request_ids = request_ids.clone
15
+ @semaphore = Mutex.new
16
+ @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
+ end
18
+
19
+ private
20
+
21
+ def scrape_requests
22
+ b = Watir::Browser.new
23
+ @login_command.execute b
24
+
25
+ until @request_ids.empty?
26
+ request = scrape_next_request b
27
+ yield request if request
28
+ end
29
+
30
+ b.close
31
+ end
32
+
33
+ def scrape_next_request(b)
34
+ id = next_request_id
35
+ return unless id
36
+ @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
+ request_page = RequestPage.new b
38
+ request_page.get_request
39
+ end
40
+
41
+ def next_request_id
42
+ @semaphore.synchronize { return @request_ids.pop }
43
+ end
44
+
45
45
  end
@@ -1,46 +1,46 @@
1
- require 'time'
2
- require 'watir-webdriver'
3
-
4
- class RequestFinder
5
-
6
- def initialize(navigator)
7
- @navigator = navigator
8
- end
9
-
10
- def get_request_ids_for_users(users, options={})
11
- b = Watir::Browser.new
12
- requests = users.map { |user| get_requests_for_user user, b }.flatten
13
- b.close
14
- start_date = options[:start_date]
15
- requests.reject! { |r| start_date > r[:submitted_on] } if start_date
16
- requests.map { |r| r[:id] }.sort
17
- end
18
-
19
- private
20
-
21
- def get_requests_for_user(user, b)
22
- login_command = LoginCommand.new user[:username], user[:password], @navigator
23
- login_command.execute b
24
- @navigator.goto 'hd/ticket/euTicketFind.ssp', b
25
- get_all_requests b
26
- end
27
-
28
- def get_all_requests(b)
29
- b.button(value: 'Find').click
30
- rows = b.tables[1].rows.to_a
31
- rows.shift 2
32
-
33
- requests = rows.map do |row|
34
- cells = row.cells.to_a
35
- next if cells.empty?
36
- {
37
- id: cells[0].text.to_i,
38
- submitted_on: Time.parse(cells[2].text)
39
- }
40
- end
41
-
42
- requests.compact
43
- end
44
-
45
- end
46
-
1
+ require 'time'
2
+ require 'watir-webdriver'
3
+
4
+ class RequestFinder
5
+
6
+ def initialize(navigator)
7
+ @navigator = navigator
8
+ end
9
+
10
+ def get_request_ids_for_users(users, options={})
11
+ b = Watir::Browser.new
12
+ requests = users.map { |user| get_requests_for_user user, b }.flatten
13
+ b.close
14
+ start_date = options[:start_date]
15
+ requests.reject! { |r| start_date > r[:submitted_on] } if start_date
16
+ requests.map { |r| r[:id] }.sort
17
+ end
18
+
19
+ private
20
+
21
+ def get_requests_for_user(user, b)
22
+ login_command = LoginCommand.new user[:username], user[:password], @navigator
23
+ login_command.execute b
24
+ @navigator.goto 'hd/ticket/euTicketFind.ssp', b
25
+ get_all_requests b
26
+ end
27
+
28
+ def get_all_requests(b)
29
+ b.button(value: 'Find').click
30
+ rows = b.tables[1].rows.to_a
31
+ rows.shift 2
32
+
33
+ requests = rows.map do |row|
34
+ cells = row.cells.to_a
35
+ next if cells.empty?
36
+ {
37
+ id: cells[0].text.to_i,
38
+ submitted_on: Time.parse(cells[2].text)
39
+ }
40
+ end
41
+
42
+ requests.compact
43
+ end
44
+
45
+ end
46
+
@@ -1,25 +1,25 @@
1
- require 'time'
2
-
3
- class RequestHistoryTable
4
-
5
- def initialize(table)
6
- @table = table
7
- end
8
-
9
- def get_resolution_info
10
- r = {}
11
- rows = @table.trs.to_a
12
- resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
-
14
- if resolved_row_index
15
- row_above = rows[resolved_row_index-1]
16
- captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
- r[:resolved_on] = Time.parse captures[0]
18
- r[:resolved_by] = captures[1]
19
- end
20
-
21
- r[:history] = @table.text
22
- r
23
- end
24
-
1
+ require 'time'
2
+
3
+ class RequestHistoryTable
4
+
5
+ def initialize(table)
6
+ @table = table
7
+ end
8
+
9
+ def get_resolution_info
10
+ r = {}
11
+ rows = @table.trs.to_a
12
+ resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
+
14
+ if resolved_row_index
15
+ row_above = rows[resolved_row_index-1]
16
+ captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
+ r[:resolved_on] = Time.parse captures[0]
18
+ r[:resolved_by] = captures[1]
19
+ end
20
+
21
+ r[:history] = @table.text
22
+ r
23
+ end
24
+
25
25
  end
@@ -1,43 +1,43 @@
1
- class RequestInfoTable
2
-
3
- def initialize(table)
4
- @cells = table.tds.to_a
5
- end
6
-
7
- def get_request_info
8
- {
9
- title: cell(0),
10
- status: cell(2),
11
- service: cell(3),
12
- request_type: cell(4),
13
- time_spent: cell(5),
14
- priority: cell(7),
15
- deadline: time_cell(8),
16
- submitted_to: cell(10),
17
- submitted_by: cell(11),
18
- submitted_on: time_cell(12),
19
- assigned_to: cell(15),
20
- assigned_by: cell(16),
21
- assigned_on: time_cell(17),
22
- department_id: cell(19),
23
- closed_by: cell(20),
24
- closed_on: maybe_time_cell(21, 'None')
25
- }
26
- end
27
-
28
- private
29
-
30
- def maybe_time_cell(index, non_time_value)
31
- cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
- end
33
-
34
- def time_cell(index)
35
- cell(index) { |s| Time.parse s }
36
- end
37
-
38
- def cell(index)
39
- text = @cells[index].text
40
- block_given? ? yield(text) : text
41
- end
42
-
1
+ class RequestInfoTable
2
+
3
+ def initialize(table)
4
+ @cells = table.tds.to_a
5
+ end
6
+
7
+ def get_request_info
8
+ {
9
+ title: cell(0),
10
+ status: cell(2),
11
+ service: cell(3),
12
+ request_type: cell(4),
13
+ time_spent: cell(5),
14
+ priority: cell(7),
15
+ deadline: maybe_time_cell(8, 'None'),
16
+ submitted_to: cell(10),
17
+ submitted_by: cell(11),
18
+ submitted_on: time_cell(12),
19
+ assigned_to: cell(15),
20
+ assigned_by: cell(16),
21
+ assigned_on: time_cell(17),
22
+ department_id: cell(19),
23
+ closed_by: cell(20),
24
+ closed_on: maybe_time_cell(21, 'None')
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def maybe_time_cell(index, non_time_value)
31
+ cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
+ end
33
+
34
+ def time_cell(index)
35
+ cell(index) { |s| Time.parse s }
36
+ end
37
+
38
+ def cell(index)
39
+ text = @cells[index].text
40
+ block_given? ? yield(text) : text
41
+ end
42
+
43
43
  end
@@ -1,32 +1,31 @@
1
- require_relative 'request_info_table'
2
- require_relative 'request_history_table'
3
-
4
- class RequestPage
5
-
6
- def initialize(b)
7
- @b = b
8
- end
9
-
10
- def get_request
11
- r = { id: request_id }
12
- return r if request_not_found?
13
-
14
- request_info_table = RequestInfoTable.new @b.tables[1]
15
- request_history_table = RequestHistoryTable.new @b.tables[3]
16
-
17
- r.merge! request_info_table.get_request_info
18
- r.merge! request_history_table.get_resolution_info
19
- r
20
- end
21
-
22
- private
23
-
24
- def request_id
25
- @b.text[/Request #(\d+)/, 1]
26
- end
27
-
28
- def request_not_found?
29
- @b.text =~ /Request #\d+ not found/
30
- end
31
-
1
+ require_relative 'request_info_table'
2
+ require_relative 'request_history_table'
3
+
4
+ class RequestPage
5
+
6
+ def initialize(b)
7
+ @b = b
8
+ end
9
+
10
+ def get_request
11
+ return if request_not_found?
12
+
13
+ request_info_table = RequestInfoTable.new @b.tables[1]
14
+ request_history_table = RequestHistoryTable.new @b.tables[3]
15
+
16
+ r.merge! request_info_table.get_request_info
17
+ r.merge! request_history_table.get_resolution_info
18
+ r
19
+ end
20
+
21
+ private
22
+
23
+ def request_id
24
+ @b.text[/Request #(\d+)/, 1]
25
+ end
26
+
27
+ def request_not_found?
28
+ @b.text =~ /Request #\d+ not found/
29
+ end
30
+
32
31
  end
@@ -1,56 +1,56 @@
1
- $stdout.sync = true
2
-
3
- require 'fileutils'
4
- require 'json'
5
- require_relative 'lib/login_command'
6
- require_relative 'lib/navigator'
7
- require_relative 'lib/parallel_request_scraper'
8
- require_relative 'lib/request_finder'
9
-
10
- class TrackItScraper
11
-
12
- def initialize(base_url, username, password, options={})
13
- navigator = Navigator.new base_url
14
- login_command = LoginCommand.new username, password, navigator
15
- threads = options[:threads] || 1
16
- @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
- @request_finder = RequestFinder.new navigator
18
- @output_dir = options[:output_dir] || default_output_dir
19
- end
20
-
21
- def scrape_requests_for_users(users, options={})
22
- request_ids = @request_finder.get_request_ids_for_users users, options
23
- scrape_requests request_ids
24
- end
25
-
26
- def scrape_requests_in_range(from_id, to_id)
27
- request_ids = (from_id..to_id).to_a
28
- scrape_requests request_ids
29
- end
30
-
31
- private
32
-
33
- def scrape_requests(request_ids)
34
- FileUtils.mkdir_p @output_dir
35
- request_ids = request_ids - existing_request_ids
36
- @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
37
- end
38
-
39
- def existing_request_ids
40
- Dir["#@output_dir/*"].map { |f| File.basename(f, '.json').to_i }
41
- end
42
-
43
- def write_request_file(request)
44
- File.write request_file_path(request), request.to_json
45
- print '.'
46
- end
47
-
48
- def request_file_path(request)
49
- File.join @output_dir, "#{request[:id]}.json"
50
- end
51
-
52
- def default_output_dir
53
- File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
54
- end
55
-
1
+ $stdout.sync = true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require_relative 'lib/login_command'
6
+ require_relative 'lib/navigator'
7
+ require_relative 'lib/parallel_request_scraper'
8
+ require_relative 'lib/request_finder'
9
+
10
+ class TrackItScraper
11
+
12
+ def initialize(base_url, username, password, options={})
13
+ navigator = Navigator.new base_url
14
+ login_command = LoginCommand.new username, password, navigator
15
+ threads = options[:threads] || 1
16
+ @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
+ @request_finder = RequestFinder.new navigator
18
+ @output_dir = options[:output_dir] || default_output_dir
19
+ end
20
+
21
+ def scrape_requests_for_users(users, options={})
22
+ request_ids = @request_finder.get_request_ids_for_users users, options
23
+ scrape_requests request_ids
24
+ end
25
+
26
+ def scrape_requests_in_range(from_id, to_id)
27
+ request_ids = (from_id..to_id).to_a
28
+ scrape_requests request_ids
29
+ end
30
+
31
+ private
32
+
33
+ def scrape_requests(request_ids)
34
+ FileUtils.mkdir_p @output_dir
35
+ request_ids = request_ids - existing_request_ids
36
+ @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
37
+ end
38
+
39
+ def existing_request_ids
40
+ Dir["#@output_dir/**/*.json"].map { |f| File.basename(f, '.*').to_i }
41
+ end
42
+
43
+ def write_request_file(request)
44
+ File.write request_file_path(request), request.to_json
45
+ print '.'
46
+ end
47
+
48
+ def request_file_path(request)
49
+ File.join @output_dir, "#{request[:id]}.json"
50
+ end
51
+
52
+ def default_output_dir
53
+ File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
54
+ end
55
+
56
56
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trackit_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-12 00:00:00.000000000 Z
12
+ date: 2013-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &70192723069620 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: 1.7.7
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: 1.7.7
24
+ version_requirements: *70192723069620
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: watir-webdriver
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &70192723069140 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ~>
@@ -37,12 +32,7 @@ dependencies:
37
32
  version: 0.6.4
38
33
  type: :runtime
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ~>
44
- - !ruby/object:Gem::Version
45
- version: 0.6.4
35
+ version_requirements: *70192723069140
46
36
  description:
47
37
  email: matthew-github@matthewriley.name
48
38
  executables: []
@@ -78,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
68
  version: '0'
79
69
  requirements: []
80
70
  rubyforge_project:
81
- rubygems_version: 1.8.24
71
+ rubygems_version: 1.8.15
82
72
  signing_key:
83
73
  specification_version: 3
84
74
  summary: Screen scrapes data from the Track-It help desk web application.