trackit_scraper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ class LoginCommand
2
+
3
+ def initialize(username, password, navigator)
4
+ @username = username
5
+ @password = password
6
+ @navigator = navigator
7
+ end
8
+
9
+ def execute(b)
10
+ @navigator.goto 'hd/index.ssp', b
11
+ login b
12
+ end
13
+
14
+ private
15
+
16
+ def login(b)
17
+ b.text_field(name: 'user_id').set @username
18
+ b.text_field(name: 'user_pwd').set @password
19
+ b.button(value: 'Log on').click
20
+ end
21
+
22
+ end
@@ -0,0 +1,12 @@
1
+ class Navigator
2
+
3
+ def initialize(base_url)
4
+ @base_url = base_url
5
+ end
6
+
7
+ def goto(relative_url, b)
8
+ url = File.join @base_url, relative_url
9
+ b.goto url
10
+ end
11
+
12
+ end
@@ -0,0 +1,45 @@
1
+ require 'thread'
2
+ require 'watir-webdriver'
3
+ require_relative 'request_page'
4
+
5
+ class ParallelRequestScraper
6
+
7
+ def initialize(navigator, login_command, threads)
8
+ @navigator = navigator
9
+ @login_command = login_command
10
+ @threads = threads
11
+ end
12
+
13
+ def scrape(request_ids, &block)
14
+ @request_ids = request_ids.clone
15
+ @semaphore = Mutex.new
16
+ @threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
17
+ end
18
+
19
+ private
20
+
21
+ def scrape_requests
22
+ b = Watir::Browser.new
23
+ @login_command.execute b
24
+
25
+ until @request_ids.empty?
26
+ request = scrape_next_request b
27
+ request ? yield(request) : break
28
+ end
29
+
30
+ b.close
31
+ end
32
+
33
+ def scrape_next_request(b)
34
+ id = next_request_id
35
+ return unless id
36
+ @navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
37
+ request_page = RequestPage.new b
38
+ request_page.get_request
39
+ end
40
+
41
+ def next_request_id
42
+ @semaphore.synchronize { return @request_ids.pop }
43
+ end
44
+
45
+ end
@@ -0,0 +1,31 @@
1
+ require 'watir-webdriver'
2
+
3
+ class RequestFinder
4
+
5
+ def initialize(navigator)
6
+ @navigator = navigator
7
+ end
8
+
9
+ def get_request_ids_for_users(users)
10
+ b = Watir::Browser.new
11
+ request_ids = users.map { |user| get_request_ids_for_user user, b }.flatten.sort
12
+ b.close
13
+ request_ids
14
+ end
15
+
16
+ private
17
+
18
+ def get_request_ids_for_user(user, b)
19
+ login_command = LoginCommand.new user[:username], user[:password], @navigator
20
+ login_command.execute b
21
+ @navigator.goto 'hd/ticket/euTicketFind.ssp', b
22
+ get_all_request_ids b
23
+ end
24
+
25
+ def get_all_request_ids(b)
26
+ b.button(value: 'Find').click
27
+ b.links(href: /ticket_id=/).to_a.map { |link| link.href[/ticket_id=(\d+)/, 1].to_i }
28
+ end
29
+
30
+ end
31
+
@@ -0,0 +1,25 @@
1
+ require 'time'
2
+
3
+ class RequestHistoryTable
4
+
5
+ def initialize(table)
6
+ @table = table
7
+ end
8
+
9
+ def get_resolution_info
10
+ r = {}
11
+ rows = @table.trs.to_a
12
+ resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
13
+
14
+ if resolved_row_index
15
+ row_above = rows[resolved_row_index-1]
16
+ captures = row_above.text.scan(/(.+) by (.+)/)[0]
17
+ r[:resolved_on] = Time.parse captures[0]
18
+ r[:resolved_by] = captures[1]
19
+ end
20
+
21
+ r[:history] = @table.text
22
+ r
23
+ end
24
+
25
+ end
@@ -0,0 +1,43 @@
1
+ class RequestInfoTable
2
+
3
+ def initialize(table)
4
+ @cells = table.tds.to_a
5
+ end
6
+
7
+ def get_request_info
8
+ {
9
+ title: cell(0),
10
+ status: cell(2),
11
+ service: cell(3),
12
+ request_type: cell(4),
13
+ time_spent: cell(5),
14
+ priority: cell(7),
15
+ deadline: time_cell(8),
16
+ submitted_to: cell(10),
17
+ submitted_by: cell(11),
18
+ submitted_on: time_cell(12),
19
+ assigned_to: cell(15),
20
+ assigned_by: cell(16),
21
+ assigned_on: time_cell(17),
22
+ department_id: cell(19),
23
+ closed_by: cell(20),
24
+ closed_on: maybe_time_cell(21, 'None')
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def maybe_time_cell(index, non_time_value)
31
+ cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
32
+ end
33
+
34
+ def time_cell(index)
35
+ cell(index) { |s| Time.parse s }
36
+ end
37
+
38
+ def cell(index)
39
+ text = @cells[index].text
40
+ block_given? ? yield(text) : text
41
+ end
42
+
43
+ end
@@ -0,0 +1,25 @@
1
+ require_relative 'request_info_table'
2
+ require_relative 'request_history_table'
3
+
4
+ class RequestPage
5
+
6
+ def initialize(b)
7
+ @b = b
8
+ end
9
+
10
+ def get_request
11
+ request_info_table = RequestInfoTable.new @b.tables[1]
12
+ request_history_table = RequestHistoryTable.new @b.tables[3]
13
+ r = { id: request_id }
14
+ r.merge! request_info_table.get_request_info
15
+ r.merge! request_history_table.get_resolution_info
16
+ r
17
+ end
18
+
19
+ private
20
+
21
+ def request_id
22
+ @b.text[/Request #(\d+)/, 1]
23
+ end
24
+
25
+ end
@@ -0,0 +1,51 @@
1
+ $stdout.sync = true
2
+
3
+ require 'fileutils'
4
+ require 'json'
5
+ require_relative 'lib/login_command'
6
+ require_relative 'lib/navigator'
7
+ require_relative 'lib/parallel_request_scraper'
8
+ require_relative 'lib/request_finder'
9
+
10
+ class TrackIt
11
+
12
+ def initialize(base_url, username, password, options={})
13
+ navigator = Navigator.new base_url
14
+ login_command = LoginCommand.new username, password, navigator
15
+ threads = options[:threads] || 1
16
+ @parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
17
+ @request_finder = RequestFinder.new navigator
18
+ @output_dir = options[:output_dir] || default_output_dir
19
+ end
20
+
21
+ def scrape_requests_for_users(users)
22
+ request_ids = @request_finder.get_request_ids_for_users users
23
+ scrape_requests request_ids
24
+ end
25
+
26
+ def scrape_requests_in_range(from_id, to_id)
27
+ request_ids = (from_id..to_id).to_a
28
+ scrape_requests request_ids
29
+ end
30
+
31
+ private
32
+
33
+ def scrape_requests(request_ids)
34
+ FileUtils.mkdir_p @output_dir
35
+ @parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
36
+ end
37
+
38
+ def write_request_file(request)
39
+ File.write request_file_path(request), request.to_json
40
+ print '.'
41
+ end
42
+
43
+ def request_file_path(request)
44
+ File.join @output_dir, "#{request[:id]}.json"
45
+ end
46
+
47
+ def default_output_dir
48
+ File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
49
+ end
50
+
51
+ end
@@ -0,0 +1 @@
1
+ require_relative '2003.10.1/trackit'
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: trackit_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matthew Riley
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: json
16
+ requirement: &70292605876980 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 1.7.7
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70292605876980
25
+ - !ruby/object:Gem::Dependency
26
+ name: watir-webdriver
27
+ requirement: &70292605876500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.6.4
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70292605876500
36
+ description:
37
+ email: matthew-github@matthewriley.name
38
+ executables: []
39
+ extensions: []
40
+ extra_rdoc_files: []
41
+ files:
42
+ - lib/trackit_scraper/2003.10.1/lib/login_command.rb
43
+ - lib/trackit_scraper/2003.10.1/lib/navigator.rb
44
+ - lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb
45
+ - lib/trackit_scraper/2003.10.1/lib/request_finder.rb
46
+ - lib/trackit_scraper/2003.10.1/lib/request_history_table.rb
47
+ - lib/trackit_scraper/2003.10.1/lib/request_info_table.rb
48
+ - lib/trackit_scraper/2003.10.1/lib/request_page.rb
49
+ - lib/trackit_scraper/2003.10.1/trackit.rb
50
+ - lib/trackit_scraper/2003.10.1.rb
51
+ homepage:
52
+ licenses: []
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 1.8.15
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Screen scrapes data from the Track-It help desk web application.
75
+ test_files: []