trackit_scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/trackit_scraper/2003.10.1/lib/login_command.rb +22 -0
- data/lib/trackit_scraper/2003.10.1/lib/navigator.rb +12 -0
- data/lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb +45 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_finder.rb +31 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_history_table.rb +25 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_info_table.rb +43 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_page.rb +25 -0
- data/lib/trackit_scraper/2003.10.1/trackit.rb +51 -0
- data/lib/trackit_scraper/2003.10.1.rb +1 -0
- metadata +75 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
class LoginCommand
|
2
|
+
|
3
|
+
def initialize(username, password, navigator)
|
4
|
+
@username = username
|
5
|
+
@password = password
|
6
|
+
@navigator = navigator
|
7
|
+
end
|
8
|
+
|
9
|
+
def execute(b)
|
10
|
+
@navigator.goto 'hd/index.ssp', b
|
11
|
+
login b
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def login(b)
|
17
|
+
b.text_field(name: 'user_id').set @username
|
18
|
+
b.text_field(name: 'user_pwd').set @password
|
19
|
+
b.button(value: 'Log on').click
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'watir-webdriver'
|
3
|
+
require_relative 'request_page'
|
4
|
+
|
5
|
+
class ParallelRequestScraper
|
6
|
+
|
7
|
+
def initialize(navigator, login_command, threads)
|
8
|
+
@navigator = navigator
|
9
|
+
@login_command = login_command
|
10
|
+
@threads = threads
|
11
|
+
end
|
12
|
+
|
13
|
+
def scrape(request_ids, &block)
|
14
|
+
@request_ids = request_ids.clone
|
15
|
+
@semaphore = Mutex.new
|
16
|
+
@threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def scrape_requests
|
22
|
+
b = Watir::Browser.new
|
23
|
+
@login_command.execute b
|
24
|
+
|
25
|
+
until @request_ids.empty?
|
26
|
+
request = scrape_next_request b
|
27
|
+
request ? yield(request) : break
|
28
|
+
end
|
29
|
+
|
30
|
+
b.close
|
31
|
+
end
|
32
|
+
|
33
|
+
def scrape_next_request(b)
|
34
|
+
id = next_request_id
|
35
|
+
return unless id
|
36
|
+
@navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
|
37
|
+
request_page = RequestPage.new b
|
38
|
+
request_page.get_request
|
39
|
+
end
|
40
|
+
|
41
|
+
def next_request_id
|
42
|
+
@semaphore.synchronize { return @request_ids.pop }
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'watir-webdriver'
|
2
|
+
|
3
|
+
class RequestFinder
|
4
|
+
|
5
|
+
def initialize(navigator)
|
6
|
+
@navigator = navigator
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_request_ids_for_users(users)
|
10
|
+
b = Watir::Browser.new
|
11
|
+
request_ids = users.map { |user| get_request_ids_for_user user, b }.flatten.sort
|
12
|
+
b.close
|
13
|
+
request_ids
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def get_request_ids_for_user(user, b)
|
19
|
+
login_command = LoginCommand.new user[:username], user[:password], @navigator
|
20
|
+
login_command.execute b
|
21
|
+
@navigator.goto 'hd/ticket/euTicketFind.ssp', b
|
22
|
+
get_all_request_ids b
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_all_request_ids(b)
|
26
|
+
b.button(value: 'Find').click
|
27
|
+
b.links(href: /ticket_id=/).to_a.map { |link| link.href[/ticket_id=(\d+)/, 1].to_i }
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
class RequestHistoryTable
|
4
|
+
|
5
|
+
def initialize(table)
|
6
|
+
@table = table
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_resolution_info
|
10
|
+
r = {}
|
11
|
+
rows = @table.trs.to_a
|
12
|
+
resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
|
13
|
+
|
14
|
+
if resolved_row_index
|
15
|
+
row_above = rows[resolved_row_index-1]
|
16
|
+
captures = row_above.text.scan(/(.+) by (.+)/)[0]
|
17
|
+
r[:resolved_on] = Time.parse captures[0]
|
18
|
+
r[:resolved_by] = captures[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
r[:history] = @table.text
|
22
|
+
r
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
class RequestInfoTable
|
2
|
+
|
3
|
+
def initialize(table)
|
4
|
+
@cells = table.tds.to_a
|
5
|
+
end
|
6
|
+
|
7
|
+
def get_request_info
|
8
|
+
{
|
9
|
+
title: cell(0),
|
10
|
+
status: cell(2),
|
11
|
+
service: cell(3),
|
12
|
+
request_type: cell(4),
|
13
|
+
time_spent: cell(5),
|
14
|
+
priority: cell(7),
|
15
|
+
deadline: time_cell(8),
|
16
|
+
submitted_to: cell(10),
|
17
|
+
submitted_by: cell(11),
|
18
|
+
submitted_on: time_cell(12),
|
19
|
+
assigned_to: cell(15),
|
20
|
+
assigned_by: cell(16),
|
21
|
+
assigned_on: time_cell(17),
|
22
|
+
department_id: cell(19),
|
23
|
+
closed_by: cell(20),
|
24
|
+
closed_on: maybe_time_cell(21, 'None')
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def maybe_time_cell(index, non_time_value)
|
31
|
+
cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def time_cell(index)
|
35
|
+
cell(index) { |s| Time.parse s }
|
36
|
+
end
|
37
|
+
|
38
|
+
def cell(index)
|
39
|
+
text = @cells[index].text
|
40
|
+
block_given? ? yield(text) : text
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'request_info_table'
|
2
|
+
require_relative 'request_history_table'
|
3
|
+
|
4
|
+
class RequestPage
|
5
|
+
|
6
|
+
def initialize(b)
|
7
|
+
@b = b
|
8
|
+
end
|
9
|
+
|
10
|
+
def get_request
|
11
|
+
request_info_table = RequestInfoTable.new @b.tables[1]
|
12
|
+
request_history_table = RequestHistoryTable.new @b.tables[3]
|
13
|
+
r = { id: request_id }
|
14
|
+
r.merge! request_info_table.get_request_info
|
15
|
+
r.merge! request_history_table.get_resolution_info
|
16
|
+
r
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def request_id
|
22
|
+
@b.text[/Request #(\d+)/, 1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
$stdout.sync = true
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
require 'json'
|
5
|
+
require_relative 'lib/login_command'
|
6
|
+
require_relative 'lib/navigator'
|
7
|
+
require_relative 'lib/parallel_request_scraper'
|
8
|
+
require_relative 'lib/request_finder'
|
9
|
+
|
10
|
+
class TrackIt
|
11
|
+
|
12
|
+
def initialize(base_url, username, password, options={})
|
13
|
+
navigator = Navigator.new base_url
|
14
|
+
login_command = LoginCommand.new username, password, navigator
|
15
|
+
threads = options[:threads] || 1
|
16
|
+
@parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
|
17
|
+
@request_finder = RequestFinder.new navigator
|
18
|
+
@output_dir = options[:output_dir] || default_output_dir
|
19
|
+
end
|
20
|
+
|
21
|
+
def scrape_requests_for_users(users)
|
22
|
+
request_ids = @request_finder.get_request_ids_for_users users
|
23
|
+
scrape_requests request_ids
|
24
|
+
end
|
25
|
+
|
26
|
+
def scrape_requests_in_range(from_id, to_id)
|
27
|
+
request_ids = (from_id..to_id).to_a
|
28
|
+
scrape_requests request_ids
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def scrape_requests(request_ids)
|
34
|
+
FileUtils.mkdir_p @output_dir
|
35
|
+
@parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
|
36
|
+
end
|
37
|
+
|
38
|
+
def write_request_file(request)
|
39
|
+
File.write request_file_path(request), request.to_json
|
40
|
+
print '.'
|
41
|
+
end
|
42
|
+
|
43
|
+
def request_file_path(request)
|
44
|
+
File.join @output_dir, "#{request[:id]}.json"
|
45
|
+
end
|
46
|
+
|
47
|
+
def default_output_dir
|
48
|
+
File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '2003.10.1/trackit'
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: trackit_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matthew Riley
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-07-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: json
|
16
|
+
requirement: &70292605876980 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.7.7
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70292605876980
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: watir-webdriver
|
27
|
+
requirement: &70292605876500 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.6.4
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70292605876500
|
36
|
+
description:
|
37
|
+
email: matthew-github@matthewriley.name
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
extra_rdoc_files: []
|
41
|
+
files:
|
42
|
+
- lib/trackit_scraper/2003.10.1/lib/login_command.rb
|
43
|
+
- lib/trackit_scraper/2003.10.1/lib/navigator.rb
|
44
|
+
- lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb
|
45
|
+
- lib/trackit_scraper/2003.10.1/lib/request_finder.rb
|
46
|
+
- lib/trackit_scraper/2003.10.1/lib/request_history_table.rb
|
47
|
+
- lib/trackit_scraper/2003.10.1/lib/request_info_table.rb
|
48
|
+
- lib/trackit_scraper/2003.10.1/lib/request_page.rb
|
49
|
+
- lib/trackit_scraper/2003.10.1/trackit.rb
|
50
|
+
- lib/trackit_scraper/2003.10.1.rb
|
51
|
+
homepage:
|
52
|
+
licenses: []
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.8.15
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Screen scrapes data from the Track-It help desk web application.
|
75
|
+
test_files: []
|