trackit_scraper 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,22 @@
1
- class LoginCommand
2
-
3
- def initialize(username, password, navigator)
4
- @username = username
5
- @password = password
6
- @navigator = navigator
7
- end
8
-
9
- def execute(b)
10
- @navigator.goto 'hd/index.ssp', b
11
- login b
12
- end
13
-
14
- private
15
-
16
- def login(b)
17
- b.text_field(name: 'user_id').set @username
18
- b.text_field(name: 'user_pwd').set @password
19
- b.button(value: 'Log on').click
20
- end
21
-
1
+ class LoginCommand
2
+
3
+ def initialize(username, password, navigator)
4
+ @username = username
5
+ @password = password
6
+ @navigator = navigator
7
+ end
8
+
9
+ def execute(b)
10
+ @navigator.goto 'hd/index.ssp', b
11
+ login b
12
+ end
13
+
14
+ private
15
+
16
+ def login(b)
17
+ b.text_field(name: 'user_id').set @username
18
+ b.text_field(name: 'user_pwd').set @password
19
+ b.button(value: 'Log on').click
20
+ end
21
+
22
22
  end
@@ -1,12 +1,12 @@
1
- class Navigator
2
-
3
- def initialize(base_url)
4
- @base_url = base_url
5
- end
6
-
7
- def goto(relative_url, b)
8
- url = File.join @base_url, relative_url
9
- b.goto url
10
- end
11
-
1
+ class Navigator
2
+
3
+ def initialize(base_url)
4
+ @base_url = base_url
5
+ end
6
+
7
+ def goto(relative_url, b)
8
+ url = File.join @base_url, relative_url
9
+ b.goto url
10
+ end
11
+
12
12
  end
@@ -1,45 +1,45 @@
1
- require 'thread'
2
- require 'watir-webdriver'
3
- require_relative 'request_page'
4
-
5
- class ParallelRequestScraper
6
-
7
- def initialize(navigator, login_command, threads)
8
- @navigator = navigator
9
- @login_command = login_command
10
- @threads = threads
11
- end
12
-
13
- def scrape(request_ids, &block)
14
- @request_ids = request_ids.clone
15
- @semaphore = Mutex.new
16
- @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
- end
18
-
19
- private
20
-
21
- def scrape_requests
22
- b = Watir::Browser.new
23
- @login_command.execute b
24
-
25
- until @request_ids.empty?
26
- request = scrape_next_request b
27
- yield request
28
- end
29
-
30
- b.close
31
- end
32
-
33
- def scrape_next_request(b)
34
- id = next_request_id
35
- return unless id
36
- @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
- request_page = RequestPage.new b
38
- request_page.get_request
39
- end
40
-
41
- def next_request_id
42
- @semaphore.synchronize { return @request_ids.pop }
43
- end
44
-
1
+ require 'thread'
2
+ require 'watir-webdriver'
3
+ require_relative 'request_page'
4
+
5
+ class ParallelRequestScraper
6
+
7
+ def initialize(navigator, login_command, threads)
8
+ @navigator = navigator
9
+ @login_command = login_command
10
+ @threads = threads
11
+ end
12
+
13
+ def scrape(request_ids, &block)
14
+ @request_ids = request_ids.clone
15
+ @semaphore = Mutex.new
16
+ @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
+ end
18
+
19
+ private
20
+
21
+ def scrape_requests
22
+ b = Watir::Browser.new
23
+ @login_command.execute b
24
+
25
+ until @request_ids.empty?
26
+ request = scrape_next_request b
27
+ yield request if request
28
+ end
29
+
30
+ b.close
31
+ end
32
+
33
+ def scrape_next_request(b)
34
+ id = next_request_id
35
+ return unless id
36
+ @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
+ request_page = RequestPage.new b
38
+ request_page.get_request
39
+ end
40
+
41
+ def next_request_id
42
+ @semaphore.synchronize { return @request_ids.pop }
43
+ end
44
+
45
45
  end
@@ -1,46 +1,46 @@
1
- require 'time'
2
- require 'watir-webdriver'
3
-
4
- class RequestFinder
5
-
6
- def initialize(navigator)
7
- @navigator = navigator
8
- end
9
-
10
- def get_request_ids_for_users(users, options={})
11
- b = Watir::Browser.new
12
- requests = users.map { |user| get_requests_for_user user, b }.flatten
13
- b.close
14
- start_date = options[:start_date]
15
- requests.reject! { |r| start_date > r[:submitted_on] } if start_date
16
- requests.map { |r| r[:id] }.sort
17
- end
18
-
19
- private
20
-
21
- def get_requests_for_user(user, b)
22
- login_command = LoginCommand.new user[:username], user[:password], @navigator
23
- login_command.execute b
24
- @navigator.goto 'hd/ticket/euTicketFind.ssp', b
25
- get_all_requests b
26
- end
27
-
28
- def get_all_requests(b)
29
- b.button(value: 'Find').click
30
- rows = b.tables[1].rows.to_a
31
- rows.shift 2
32
-
33
- requests = rows.map do |row|
34
- cells = row.cells.to_a
35
- next if cells.empty?
36
- {
37
- id: cells[0].text.to_i,
38
- submitted_on: Time.parse(cells[2].text)
39
- }
40
- end
41
-
42
- requests.compact
43
- end
44
-
45
- end
46
-
1
+ require 'time'
2
+ require 'watir-webdriver'
3
+
4
+ class RequestFinder
5
+
6
+ def initialize(navigator)
7
+ @navigator = navigator
8
+ end
9
+
10
+ def get_request_ids_for_users(users, options={})
11
+ b = Watir::Browser.new
12
+ requests = users.map { |user| get_requests_for_user user, b }.flatten
13
+ b.close
14
+ start_date = options[:start_date]
15
+ requests.reject! { |r| start_date > r[:submitted_on] } if start_date
16
+ requests.map { |r| r[:id] }.sort
17
+ end
18
+
19
+ private
20
+
21
+ def get_requests_for_user(user, b)
22
+ login_command = LoginCommand.new user[:username], user[:password], @navigator
23
+ login_command.execute b
24
+ @navigator.goto 'hd/ticket/euTicketFind.ssp', b
25
+ get_all_requests b
26
+ end
27
+
28
+ def get_all_requests(b)
29
+ b.button(value: 'Find').click
30
+ rows = b.tables[1].rows.to_a
31
+ rows.shift 2
32
+
33
+ requests = rows.map do |row|
34
+ cells = row.cells.to_a
35
+ next if cells.empty?
36
+ {
37
+ id: cells[0].text.to_i,
38
+ submitted_on: Time.parse(cells[2].text)
39
+ }
40
+ end
41
+
42
+ requests.compact
43
+ end
44
+
45
+ end
46
+
@@ -1,25 +1,25 @@
1
- require 'time'
2
-
3
- class RequestHistoryTable
4
-
5
- def initialize(table)
6
- @table = table
7
- end
8
-
9
- def get_resolution_info
10
- r = {}
11
- rows = @table.trs.to_a
12
- resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
-
14
- if resolved_row_index
15
- row_above = rows[resolved_row_index-1]
16
- captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
- r[:resolved_on] = Time.parse captures[0]
18
- r[:resolved_by] = captures[1]
19
- end
20
-
21
- r[:history] = @table.text
22
- r
23
- end
24
-
1
+ require 'time'
2
+
3
+ class RequestHistoryTable
4
+
5
+ def initialize(table)
6
+ @table = table
7
+ end
8
+
9
+ def get_resolution_info
10
+ r = {}
11
+ rows = @table.trs.to_a
12
+ resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
+
14
+ if resolved_row_index
15
+ row_above = rows[resolved_row_index-1]
16
+ captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
+ r[:resolved_on] = Time.parse captures[0]
18
+ r[:resolved_by] = captures[1]
19
+ end
20
+
21
+ r[:history] = @table.text
22
+ r
23
+ end
24
+
25
25
  end
@@ -1,43 +1,43 @@
1
- class RequestInfoTable
2
-
3
- def initialize(table)
4
- @cells = table.tds.to_a
5
- end
6
-
7
- def get_request_info
8
- {
9
- title: cell(0),
10
- status: cell(2),
11
- service: cell(3),
12
- request_type: cell(4),
13
- time_spent: cell(5),
14
- priority: cell(7),
15
- deadline: time_cell(8),
16
- submitted_to: cell(10),
17
- submitted_by: cell(11),
18
- submitted_on: time_cell(12),
19
- assigned_to: cell(15),
20
- assigned_by: cell(16),
21
- assigned_on: time_cell(17),
22
- department_id: cell(19),
23
- closed_by: cell(20),
24
- closed_on: maybe_time_cell(21, 'None')
25
- }
26
- end
27
-
28
- private
29
-
30
- def maybe_time_cell(index, non_time_value)
31
- cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
- end
33
-
34
- def time_cell(index)
35
- cell(index) { |s| Time.parse s }
36
- end
37
-
38
- def cell(index)
39
- text = @cells[index].text
40
- block_given? ? yield(text) : text
41
- end
42
-
1
+ class RequestInfoTable
2
+
3
+ def initialize(table)
4
+ @cells = table.tds.to_a
5
+ end
6
+
7
+ def get_request_info
8
+ {
9
+ title: cell(0),
10
+ status: cell(2),
11
+ service: cell(3),
12
+ request_type: cell(4),
13
+ time_spent: cell(5),
14
+ priority: cell(7),
15
+ deadline: maybe_time_cell(8, 'None'),
16
+ submitted_to: cell(10),
17
+ submitted_by: cell(11),
18
+ submitted_on: time_cell(12),
19
+ assigned_to: cell(15),
20
+ assigned_by: cell(16),
21
+ assigned_on: time_cell(17),
22
+ department_id: cell(19),
23
+ closed_by: cell(20),
24
+ closed_on: maybe_time_cell(21, 'None')
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def maybe_time_cell(index, non_time_value)
31
+ cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
+ end
33
+
34
+ def time_cell(index)
35
+ cell(index) { |s| Time.parse s }
36
+ end
37
+
38
+ def cell(index)
39
+ text = @cells[index].text
40
+ block_given? ? yield(text) : text
41
+ end
42
+
43
43
  end
@@ -1,32 +1,31 @@
1
- require_relative 'request_info_table'
2
- require_relative 'request_history_table'
3
-
4
- class RequestPage
5
-
6
- def initialize(b)
7
- @b = b
8
- end
9
-
10
- def get_request
11
- r = { id: request_id }
12
- return r if request_not_found?
13
-
14
- request_info_table = RequestInfoTable.new @b.tables[1]
15
- request_history_table = RequestHistoryTable.new @b.tables[3]
16
-
17
- r.merge! request_info_table.get_request_info
18
- r.merge! request_history_table.get_resolution_info
19
- r
20
- end
21
-
22
- private
23
-
24
- def request_id
25
- @b.text[/Request #(\d+)/, 1]
26
- end
27
-
28
- def request_not_found?
29
- @b.text =~ /Request #\d+ not found/
30
- end
31
-
1
+ require_relative 'request_info_table'
2
+ require_relative 'request_history_table'
3
+
4
+ class RequestPage
5
+
6
+ def initialize(b)
7
+ @b = b
8
+ end
9
+
10
+ def get_request
11
+ return if request_not_found?
12
+
13
+ request_info_table = RequestInfoTable.new @b.tables[1]
14
+ request_history_table = RequestHistoryTable.new @b.tables[3]
15
+
16
+ r.merge! request_info_table.get_request_info
17
+ r.merge! request_history_table.get_resolution_info
18
+ r
19
+ end
20
+
21
+ private
22
+
23
+ def request_id
24
+ @b.text[/Request #(\d+)/, 1]
25
+ end
26
+
27
+ def request_not_found?
28
+ @b.text =~ /Request #\d+ not found/
29
+ end
30
+
32
31
  end
@@ -1,56 +1,56 @@
1
- $stdout.sync = true
2
-
3
- require 'fileutils'
4
- require 'json'
5
- require_relative 'lib/login_command'
6
- require_relative 'lib/navigator'
7
- require_relative 'lib/parallel_request_scraper'
8
- require_relative 'lib/request_finder'
9
-
10
- class TrackItScraper
11
-
12
- def initialize(base_url, username, password, options={})
13
- navigator = Navigator.new base_url
14
- login_command = LoginCommand.new username, password, navigator
15
- threads = options[:threads] || 1
16
- @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
- @request_finder = RequestFinder.new navigator
18
- @output_dir = options[:output_dir] || default_output_dir
19
- end
20
-
21
- def scrape_requests_for_users(users, options={})
22
- request_ids = @request_finder.get_request_ids_for_users users, options
23
- scrape_requests request_ids
24
- end
25
-
26
- def scrape_requests_in_range(from_id, to_id)
27
- request_ids = (from_id..to_id).to_a
28
- scrape_requests request_ids
29
- end
30
-
31
- private
32
-
33
- def scrape_requests(request_ids)
34
- FileUtils.mkdir_p @output_dir
35
- request_ids = request_ids - existing_request_ids
36
- @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
37
- end
38
-
39
- def existing_request_ids
40
- Dir["#@output_dir/*"].map { |f| File.basename(f, '.json').to_i }
41
- end
42
-
43
- def write_request_file(request)
44
- File.write request_file_path(request), request.to_json
45
- print '.'
46
- end
47
-
48
- def request_file_path(request)
49
- File.join @output_dir, "#{request[:id]}.json"
50
- end
51
-
52
- def default_output_dir
53
- File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
54
- end
55
-
1
+ $stdout.sync = true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require_relative 'lib/login_command'
6
+ require_relative 'lib/navigator'
7
+ require_relative 'lib/parallel_request_scraper'
8
+ require_relative 'lib/request_finder'
9
+
10
+ class TrackItScraper
11
+
12
+ def initialize(base_url, username, password, options={})
13
+ navigator = Navigator.new base_url
14
+ login_command = LoginCommand.new username, password, navigator
15
+ threads = options[:threads] || 1
16
+ @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
+ @request_finder = RequestFinder.new navigator
18
+ @output_dir = options[:output_dir] || default_output_dir
19
+ end
20
+
21
+ def scrape_requests_for_users(users, options={})
22
+ request_ids = @request_finder.get_request_ids_for_users users, options
23
+ scrape_requests request_ids
24
+ end
25
+
26
+ def scrape_requests_in_range(from_id, to_id)
27
+ request_ids = (from_id..to_id).to_a
28
+ scrape_requests request_ids
29
+ end
30
+
31
+ private
32
+
33
+ def scrape_requests(request_ids)
34
+ FileUtils.mkdir_p @output_dir
35
+ request_ids = request_ids - existing_request_ids
36
+ @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
37
+ end
38
+
39
+ def existing_request_ids
40
+ Dir["#@output_dir/**/*.json"].map { |f| File.basename(f, '.*').to_i }
41
+ end
42
+
43
+ def write_request_file(request)
44
+ File.write request_file_path(request), request.to_json
45
+ print '.'
46
+ end
47
+
48
+ def request_file_path(request)
49
+ File.join @output_dir, "#{request[:id]}.json"
50
+ end
51
+
52
+ def default_output_dir
53
+ File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
54
+ end
55
+
56
56
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: trackit_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-12 00:00:00.000000000 Z
12
+ date: 2013-12-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &70192723069620 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: 1.7.7
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ~>
28
- - !ruby/object:Gem::Version
29
- version: 1.7.7
24
+ version_requirements: *70192723069620
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: watir-webdriver
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &70192723069140 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ~>
@@ -37,12 +32,7 @@ dependencies:
37
32
  version: 0.6.4
38
33
  type: :runtime
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ~>
44
- - !ruby/object:Gem::Version
45
- version: 0.6.4
35
+ version_requirements: *70192723069140
46
36
  description:
47
37
  email: matthew-github@matthewriley.name
48
38
  executables: []
@@ -78,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
68
  version: '0'
79
69
  requirements: []
80
70
  rubyforge_project:
81
- rubygems_version: 1.8.24
71
+ rubygems_version: 1.8.15
82
72
  signing_key:
83
73
  specification_version: 3
84
74
  summary: Screen scrapes data from the Track-It help desk web application.