trackit_scraper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/trackit_scraper/2003.10.1/lib/login_command.rb +22 -0
- data/lib/trackit_scraper/2003.10.1/lib/navigator.rb +12 -0
- data/lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb +45 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_finder.rb +31 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_history_table.rb +25 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_info_table.rb +43 -0
- data/lib/trackit_scraper/2003.10.1/lib/request_page.rb +25 -0
- data/lib/trackit_scraper/2003.10.1/trackit.rb +51 -0
- data/lib/trackit_scraper/2003.10.1.rb +1 -0
- metadata +75 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
class LoginCommand
|
2
|
+
|
3
|
+
def initialize(username, password, navigator)
|
4
|
+
@username = username
|
5
|
+
@password = password
|
6
|
+
@navigator = navigator
|
7
|
+
end
|
8
|
+
|
9
|
+
def execute(b)
|
10
|
+
@navigator.goto 'hd/index.ssp', b
|
11
|
+
login b
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def login(b)
|
17
|
+
b.text_field(name: 'user_id').set @username
|
18
|
+
b.text_field(name: 'user_pwd').set @password
|
19
|
+
b.button(value: 'Log on').click
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'watir-webdriver'
|
3
|
+
require_relative 'request_page'
|
4
|
+
|
5
|
+
class ParallelRequestScraper
|
6
|
+
|
7
|
+
def initialize(navigator, login_command, threads)
|
8
|
+
@navigator = navigator
|
9
|
+
@login_command = login_command
|
10
|
+
@threads = threads
|
11
|
+
end
|
12
|
+
|
13
|
+
def scrape(request_ids, &block)
|
14
|
+
@request_ids = request_ids.clone
|
15
|
+
@semaphore = Mutex.new
|
16
|
+
@threads.times.map { Thread.new { scrape_requests &block } }.each { |t| t.join }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def scrape_requests
|
22
|
+
b = Watir::Browser.new
|
23
|
+
@login_command.execute b
|
24
|
+
|
25
|
+
until @request_ids.empty?
|
26
|
+
request = scrape_next_request b
|
27
|
+
request ? yield(request) : break
|
28
|
+
end
|
29
|
+
|
30
|
+
b.close
|
31
|
+
end
|
32
|
+
|
33
|
+
def scrape_next_request(b)
|
34
|
+
id = next_request_id
|
35
|
+
return unless id
|
36
|
+
@navigator.goto "hd/ticket/euTicketView.ssp?ticket_id=#{id}&log=show", b
|
37
|
+
request_page = RequestPage.new b
|
38
|
+
request_page.get_request
|
39
|
+
end
|
40
|
+
|
41
|
+
def next_request_id
|
42
|
+
@semaphore.synchronize { return @request_ids.pop }
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'watir-webdriver'
|
2
|
+
|
3
|
+
class RequestFinder
|
4
|
+
|
5
|
+
def initialize(navigator)
|
6
|
+
@navigator = navigator
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_request_ids_for_users(users)
|
10
|
+
b = Watir::Browser.new
|
11
|
+
request_ids = users.map { |user| get_request_ids_for_user user, b }.flatten.sort
|
12
|
+
b.close
|
13
|
+
request_ids
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def get_request_ids_for_user(user, b)
|
19
|
+
login_command = LoginCommand.new user[:username], user[:password], @navigator
|
20
|
+
login_command.execute b
|
21
|
+
@navigator.goto 'hd/ticket/euTicketFind.ssp', b
|
22
|
+
get_all_request_ids b
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_all_request_ids(b)
|
26
|
+
b.button(value: 'Find').click
|
27
|
+
b.links(href: /ticket_id=/).to_a.map { |link| link.href[/ticket_id=(\d+)/, 1].to_i }
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
class RequestHistoryTable
|
4
|
+
|
5
|
+
def initialize(table)
|
6
|
+
@table = table
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_resolution_info
|
10
|
+
r = {}
|
11
|
+
rows = @table.trs.to_a
|
12
|
+
resolved_row_index = rows.index { |r| r.text =~ /Resolved by/ }
|
13
|
+
|
14
|
+
if resolved_row_index
|
15
|
+
row_above = rows[resolved_row_index-1]
|
16
|
+
captures = row_above.text.scan(/(.+) by (.+)/)[0]
|
17
|
+
r[:resolved_on] = Time.parse captures[0]
|
18
|
+
r[:resolved_by] = captures[1]
|
19
|
+
end
|
20
|
+
|
21
|
+
r[:history] = @table.text
|
22
|
+
r
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
class RequestInfoTable
|
2
|
+
|
3
|
+
def initialize(table)
|
4
|
+
@cells = table.tds.to_a
|
5
|
+
end
|
6
|
+
|
7
|
+
def get_request_info
|
8
|
+
{
|
9
|
+
title: cell(0),
|
10
|
+
status: cell(2),
|
11
|
+
service: cell(3),
|
12
|
+
request_type: cell(4),
|
13
|
+
time_spent: cell(5),
|
14
|
+
priority: cell(7),
|
15
|
+
deadline: time_cell(8),
|
16
|
+
submitted_to: cell(10),
|
17
|
+
submitted_by: cell(11),
|
18
|
+
submitted_on: time_cell(12),
|
19
|
+
assigned_to: cell(15),
|
20
|
+
assigned_by: cell(16),
|
21
|
+
assigned_on: time_cell(17),
|
22
|
+
department_id: cell(19),
|
23
|
+
closed_by: cell(20),
|
24
|
+
closed_on: maybe_time_cell(21, 'None')
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def maybe_time_cell(index, non_time_value)
|
31
|
+
cell(index) { |s| s == non_time_value ? cell(index) : time_cell(index) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def time_cell(index)
|
35
|
+
cell(index) { |s| Time.parse s }
|
36
|
+
end
|
37
|
+
|
38
|
+
def cell(index)
|
39
|
+
text = @cells[index].text
|
40
|
+
block_given? ? yield(text) : text
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'request_info_table'
|
2
|
+
require_relative 'request_history_table'
|
3
|
+
|
4
|
+
class RequestPage
|
5
|
+
|
6
|
+
def initialize(b)
|
7
|
+
@b = b
|
8
|
+
end
|
9
|
+
|
10
|
+
def get_request
|
11
|
+
request_info_table = RequestInfoTable.new @b.tables[1]
|
12
|
+
request_history_table = RequestHistoryTable.new @b.tables[3]
|
13
|
+
r = { id: request_id }
|
14
|
+
r.merge! request_info_table.get_request_info
|
15
|
+
r.merge! request_history_table.get_resolution_info
|
16
|
+
r
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def request_id
|
22
|
+
@b.text[/Request #(\d+)/, 1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
$stdout.sync = true
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
require 'json'
|
5
|
+
require_relative 'lib/login_command'
|
6
|
+
require_relative 'lib/navigator'
|
7
|
+
require_relative 'lib/parallel_request_scraper'
|
8
|
+
require_relative 'lib/request_finder'
|
9
|
+
|
10
|
+
class TrackIt
|
11
|
+
|
12
|
+
def initialize(base_url, username, password, options={})
|
13
|
+
navigator = Navigator.new base_url
|
14
|
+
login_command = LoginCommand.new username, password, navigator
|
15
|
+
threads = options[:threads] || 1
|
16
|
+
@parallel_request_scraper = ParallelRequestScraper.new navigator, login_command, threads
|
17
|
+
@request_finder = RequestFinder.new navigator
|
18
|
+
@output_dir = options[:output_dir] || default_output_dir
|
19
|
+
end
|
20
|
+
|
21
|
+
def scrape_requests_for_users(users)
|
22
|
+
request_ids = @request_finder.get_request_ids_for_users users
|
23
|
+
scrape_requests request_ids
|
24
|
+
end
|
25
|
+
|
26
|
+
def scrape_requests_in_range(from_id, to_id)
|
27
|
+
request_ids = (from_id..to_id).to_a
|
28
|
+
scrape_requests request_ids
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def scrape_requests(request_ids)
|
34
|
+
FileUtils.mkdir_p @output_dir
|
35
|
+
@parallel_request_scraper.scrape(request_ids) { |request| write_request_file request }
|
36
|
+
end
|
37
|
+
|
38
|
+
def write_request_file(request)
|
39
|
+
File.write request_file_path(request), request.to_json
|
40
|
+
print '.'
|
41
|
+
end
|
42
|
+
|
43
|
+
def request_file_path(request)
|
44
|
+
File.join @output_dir, "#{request[:id]}.json"
|
45
|
+
end
|
46
|
+
|
47
|
+
def default_output_dir
|
48
|
+
File.join 'output', Time.now.strftime('%Y-%m-%d-%H-%M')
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '2003.10.1/trackit'
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: trackit_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matthew Riley
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-07-06 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: json
|
16
|
+
requirement: &70292605876980 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.7.7
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70292605876980
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: watir-webdriver
|
27
|
+
requirement: &70292605876500 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.6.4
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70292605876500
|
36
|
+
description:
|
37
|
+
email: matthew-github@matthewriley.name
|
38
|
+
executables: []
|
39
|
+
extensions: []
|
40
|
+
extra_rdoc_files: []
|
41
|
+
files:
|
42
|
+
- lib/trackit_scraper/2003.10.1/lib/login_command.rb
|
43
|
+
- lib/trackit_scraper/2003.10.1/lib/navigator.rb
|
44
|
+
- lib/trackit_scraper/2003.10.1/lib/parallel_request_scraper.rb
|
45
|
+
- lib/trackit_scraper/2003.10.1/lib/request_finder.rb
|
46
|
+
- lib/trackit_scraper/2003.10.1/lib/request_history_table.rb
|
47
|
+
- lib/trackit_scraper/2003.10.1/lib/request_info_table.rb
|
48
|
+
- lib/trackit_scraper/2003.10.1/lib/request_page.rb
|
49
|
+
- lib/trackit_scraper/2003.10.1/trackit.rb
|
50
|
+
- lib/trackit_scraper/2003.10.1.rb
|
51
|
+
homepage:
|
52
|
+
licenses: []
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 1.8.15
|
72
|
+
signing_key:
|
73
|
+
specification_version: 3
|
74
|
+
summary: Screen scrapes data from the Track-It help desk web application.
|
75
|
+
test_files: []
|