trackit_scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ class LoginCommand
2
+
3
+ def initialize(username, password, navigator)
4
+ @username = username
5
+ @password = password
6
+ @navigator = navigator
7
+ end
8
+
9
+ def execute(b)
10
+ @navigator.goto 'hd/index.ssp', b
11
+ login b
12
+ end
13
+
14
+ private
15
+
16
+ def login(b)
17
+ b.text_field(name: 'user_id').set @username
18
+ b.text_field(name: 'user_pwd').set @password
19
+ b.button(value: 'Log on').click
20
+ end
21
+
22
+ end
@@ -0,0 +1,12 @@
1
+ class Navigator
2
+
3
+ def initialize(base_url)
4
+ @base_url = base_url
5
+ end
6
+
7
+ def goto(relative_url, b)
8
+ url = File.join @base_url, relative_url
9
+ b.goto url
10
+ end
11
+
12
+ end
@@ -0,0 +1,45 @@
1
+ require 'thread'
2
+ require 'watir-webdriver'
3
+ require_relative 'request_page'
4
+
5
+ class ParallelRequestScraper
6
+
7
+ def initialize(navigator, login_command, threads)
8
+ @navigator = navigator
9
+ @login_command = login_command
10
+ @threads = threads
11
+ end
12
+
13
+ def scrape(request_ids, &block)
14
+ @request_ids = request_ids.clone
15
+ @semaphore = Mutex.new
16
+ @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
+ end
18
+
19
+ private
20
+
21
+ def scrape_requests
22
+ b = Watir::Browser.new
23
+ @login_command.execute b
24
+
25
+ until @request_ids.empty?
26
+ request = scrape_next_request b
27
+ request ? yield(request) : break
28
+ end
29
+
30
+ b.close
31
+ end
32
+
33
+ def scrape_next_request(b)
34
+ id = next_request_id
35
+ return unless id
36
+ @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
+ request_page = RequestPage.new b
38
+ request_page.get_request
39
+ end
40
+
41
+ def next_request_id
42
+ @semaphore.synchronize { return @request_ids.pop }
43
+ end
44
+
45
+ end
@@ -0,0 +1,31 @@
1
+ require 'watir-webdriver'
2
+
3
+ class RequestFinder
4
+
5
+ def initialize(navigator)
6
+ @navigator = navigator
7
+ end
8
+
9
+ def get_request_ids_for_users(users)
10
+ b = Watir::Browser.new
11
+ request_ids = users.map { |user| get_request_ids_for_user user, b }.flatten.sort
12
+ b.close
13
+ request_ids
14
+ end
15
+
16
+ private
17
+
18
+ def get_request_ids_for_user(user, b)
19
+ login_command = LoginCommand.new user[:username], user[:password], @navigator
20
+ login_command.execute b
21
+ @navigator.goto 'hd/ticket/euTicketFind.ssp', b
22
+ get_all_request_ids b
23
+ end
24
+
25
+ def get_all_request_ids(b)
26
+ b.button(value: 'Find').click
27
+ b.links(href: /ticket_id=/).to_a.map { |link| link.href[/ticket_id=(\d+)/, 1].to_i }
28
+ end
29
+
30
+ end
31
+
@@ -0,0 +1,25 @@
1
+ require 'time'
2
+
3
+ class RequestHistoryTable
4
+
5
+ def initialize(table)
6
+ @table = table
7
+ end
8
+
9
+ def get_resolution_info
10
+ r = {}
11
+ rows = @table.trs.to_a
12
+ resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
+
14
+ if resolved_row_index
15
+ row_above = rows[resolved_row_index-1]
16
+ captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
+ r[:resolved_on] = Time.parse captures[0]
18
+ r[:resolved_by] = captures[1]
19
+ end
20
+
21
+ r[:history] = @table.text
22
+ r
23
+ end
24
+
25
+ end
@@ -0,0 +1,43 @@
1
+ class RequestInfoTable
2
+
3
+ def initialize(table)
4
+ @cells = table.tds.to_a
5
+ end
6
+
7
+ def get_request_info
8
+ {
9
+ title: cell(0),
10
+ status: cell(2),
11
+ service: cell(3),
12
+ request_type: cell(4),
13
+ time_spent: cell(5),
14
+ priority: cell(7),
15
+ deadline: time_cell(8),
16
+ submitted_to: cell(10),
17
+ submitted_by: cell(11),
18
+ submitted_on: time_cell(12),
19
+ assigned_to: cell(15),
20
+ assigned_by: cell(16),
21
+ assigned_on: time_cell(17),
22
+ department_id: cell(19),
23
+ closed_by: cell(20),
24
+ closed_on: maybe_time_cell(21, 'None')
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def maybe_time_cell(index, non_time_value)
31
+ cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
+ end
33
+
34
+ def time_cell(index)
35
+ cell(index) { |s| Time.parse s }
36
+ end
37
+
38
+ def cell(index)
39
+ text = @cells[index].text
40
+ block_given? ? yield(text) : text
41
+ end
42
+
43
+ end
@@ -0,0 +1,25 @@
1
+ require_relative 'request_info_table'
2
+ require_relative 'request_history_table'
3
+
4
+ class RequestPage
5
+
6
+ def initialize(b)
7
+ @b = b
8
+ end
9
+
10
+ def get_request
11
+ request_info_table = RequestInfoTable.new @b.tables[1]
12
+ request_history_table = RequestHistoryTable.new @b.tables[3]
13
+ r = { id: request_id }
14
+ r.merge! request_info_table.get_request_info
15
+ r.merge! request_history_table.get_resolution_info
16
+ r
17
+ end
18
+
19
+ private
20
+
21
+ def request_id
22
+ @b.text[/Request #(\d+)/, 1]
23
+ end
24
+
25
+ end
@@ -0,0 +1,51 @@
1
+ $stdout.sync = true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require_relative 'lib/login_command'
6
+ require_relative 'lib/navigator'
7
+ require_relative 'lib/parallel_request_scraper'
8
+ require_relative 'lib/request_finder'
9
+
10
+ class TrackIt
11
+
12
+ def initialize(base_url, username, password, options={})
13
+ navigator = Navigator.new base_url
14
+ login_command = LoginCommand.new username, password, navigator
15
+ threads = options[:threads] || 1
16
+ @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
+ @request_finder = RequestFinder.new navigator
18
+ @output_dir = options[:output_dir] || default_output_dir
19
+ end
20
+
21
+ def scrape_requests_for_users(users)
22
+ request_ids = @request_finder.get_request_ids_for_users users
23
+ scrape_requests request_ids
24
+ end
25
+
26
+ def scrape_requests_in_range(from_id, to_id)
27
+ request_ids = (from_id..to_id).to_a
28
+ scrape_requests request_ids
29
+ end
30
+
31
+ private
32
+
33
+ def scrape_requests(request_ids)
34
+ FileUtils.mkdir_p @output_dir
35
+ @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
36
+ end
37
+
38
+ def write_request_file(request)
39
+ File.write request_file_path(request), request.to_json
40
+ print '.'
41
+ end
42
+
43
+ def request_file_path(request)
44
+ File.join @output_dir, "#{request[:id]}.json"
45
+ end
46
+
47
+ def default_output_dir
48
+ File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
49
+ end
50
+
51
+ end
@@ -0,0 +1 @@
1
+ require_relative '2003.10.1/trackit'
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: trackit_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matthew Riley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: json
16
+ requirement: &70292605876980 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.7.7
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70292605876980
25
+ - !ruby/object:Gem::Dependency
26
+ name: watir-webdriver
27
+ requirement: &70292605876500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.6.4
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70292605876500
36
+ description:
37
+ email: matthew-github@matthewriley.name
38
+ executables: []
39
+ extensions: []
40
+ extra_rdoc_files: []
41
+ files:
42
+ - lib/trackit_scraper/2003.10.1/lib/login_command.rb
43
+ - lib/trackit_scraper/2003.10.1/lib/navigator.rb
44
+ - lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb
45
+ - lib/trackit_scraper/2003.10.1/lib/request_finder.rb
46
+ - lib/trackit_scraper/2003.10.1/lib/request_history_table.rb
47
+ - lib/trackit_scraper/2003.10.1/lib/request_info_table.rb
48
+ - lib/trackit_scraper/2003.10.1/lib/request_page.rb
49
+ - lib/trackit_scraper/2003.10.1/trackit.rb
50
+ - lib/trackit_scraper/2003.10.1.rb
51
+ homepage:
52
+ licenses: []
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 1.8.15
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Screen scrapes data from the Track-It help desk web application.
75
+ test_files: []