aranha 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aranha.rb +1 -0
- data/lib/aranha/parsers/base.rb +22 -2
- data/lib/aranha/parsers/html/base.rb +20 -2
- data/lib/aranha/parsers/html/item.rb +23 -0
- data/lib/aranha/parsers/html/item_list.rb +2 -0
- data/lib/aranha/parsers/html/node/default.rb +1 -1
- data/lib/aranha/parsers/spec/source_target_fixtures.rb +67 -0
- data/lib/aranha/parsers/spec/source_target_fixtures_example.rb +61 -0
- data/lib/aranha/selenium/auto_download_mime_types +685 -0
- data/lib/aranha/selenium/driver_factory.rb +43 -0
- data/lib/aranha/selenium/session.rb +74 -0
- data/lib/aranha/version.rb +1 -1
- metadata +54 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d65a2ccecd09ab619dea2d76306dfc48c7aad83b0b111b4544afb8492a97ef04
|
4
|
+
data.tar.gz: fd1d57c4d7ec4a22f8bd5829b8ad9a76838b729d54440b68c559cdf8ba5f9482
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b34ee01cfc1f6be364017da680c64b4abf7a5bc303b40edfdedf7532cdfd69cbe17d57349fac4fbc8881dc8db336f8a5738aa1976e9e5a3f60a09b8c4808f77
|
7
|
+
data.tar.gz: c5129c06f6ec81bfd02da79ccfa64b1187abf644e2271ac84ea30d24fc02b1608c13b74fbf3f962ecbe4a3d69955c08c51fd0ad1f692395e82ac15a78978f638
|
data/lib/aranha.rb
CHANGED
@@ -16,3 +16,4 @@ require_dependency 'aranha/parsers/html/base'
|
|
16
16
|
require_dependency 'aranha/parsers/html/item_list'
|
17
17
|
require_dependency 'aranha/parsers/invalid_state_exception'
|
18
18
|
require_dependency 'aranha/dom_elements_traverser'
|
19
|
+
require_dependency 'aranha/selenium/driver_factory'
|
data/lib/aranha/parsers/base.rb
CHANGED
@@ -6,6 +6,8 @@ require 'fileutils'
|
|
6
6
|
module Aranha
|
7
7
|
module Parsers
|
8
8
|
class Base
|
9
|
+
LOG_DIR_ENVVAR = 'ARANHA_PARSERS_LOG_DIR'
|
10
|
+
|
9
11
|
def initialize(url)
|
10
12
|
@url = url
|
11
13
|
end
|
@@ -67,14 +69,32 @@ module Aranha
|
|
67
69
|
end
|
68
70
|
|
69
71
|
def log_content(content)
|
70
|
-
|
72
|
+
path = log_file
|
73
|
+
return unless path
|
74
|
+
File.open(path, 'wb') { |file| file.write(content) }
|
71
75
|
end
|
72
76
|
|
73
77
|
def log_file
|
74
|
-
|
78
|
+
dir = log_parsers_dir
|
79
|
+
return nil unless dir
|
80
|
+
f = ::File.join(dir, "#{self.class.name.parameterize}.log")
|
75
81
|
FileUtils.mkdir_p(File.dirname(f))
|
76
82
|
f
|
77
83
|
end
|
84
|
+
|
85
|
+
def log_parsers_dir
|
86
|
+
return ENV[LOG_DIR_ENVVAR] if ENV[LOG_DIR_ENVVAR]
|
87
|
+
return ::Rails.root.join('log', 'parsers') if rails_root_exist?
|
88
|
+
nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def rails_root_exist?
|
92
|
+
klass = Module.const_get('Rails')
|
93
|
+
return false unless klass.is_a?(Class)
|
94
|
+
klass.respond_to?(:root)
|
95
|
+
rescue NameError
|
96
|
+
return false
|
97
|
+
end
|
78
98
|
end
|
79
99
|
end
|
80
100
|
end
|
@@ -1,12 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
require_relative '../base'
|
4
|
+
require_relative 'node/default'
|
5
5
|
|
6
6
|
module Aranha
|
7
7
|
module Parsers
|
8
8
|
module Html
|
9
9
|
class Base < ::Aranha::Parsers::Base
|
10
|
+
class << self
|
11
|
+
def fields
|
12
|
+
@fields ||= []
|
13
|
+
@fields.dup
|
14
|
+
end
|
15
|
+
|
16
|
+
def field(name, type, xpath)
|
17
|
+
@fields ||= []
|
18
|
+
@fields << Field.new(name, type, xpath)
|
19
|
+
end
|
20
|
+
|
21
|
+
Field = Struct.new(:name, :type, :xpath)
|
22
|
+
end
|
23
|
+
|
10
24
|
def nokogiri
|
11
25
|
@nokogiri ||= Nokogiri::HTML(content, &:noblanks)
|
12
26
|
end
|
@@ -22,6 +36,10 @@ module Aranha
|
|
22
36
|
def node_parser
|
23
37
|
@node_parser ||= node_parser_class.new(fields)
|
24
38
|
end
|
39
|
+
|
40
|
+
def fields
|
41
|
+
self.class.fields.map { |f| [f.name, f.type, f.xpath] }
|
42
|
+
end
|
25
43
|
end
|
26
44
|
end
|
27
45
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Parsers
|
7
|
+
module Html
|
8
|
+
class Item < Base
|
9
|
+
def data
|
10
|
+
@data ||= node_parser.parse(item_node)
|
11
|
+
end
|
12
|
+
|
13
|
+
def item_node
|
14
|
+
@item_node ||= begin
|
15
|
+
r = item_xpath ? nokogiri.at_xpath(item_xpath) : nokogiri
|
16
|
+
raise "Item node not found (Item xpath: #{item_xpath})" unless r
|
17
|
+
r
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Aranha
|
7
|
+
module Spec
|
8
|
+
# Lists pairs of source/target files in a directory.
|
9
|
+
class SourceTargetFixtures
|
10
|
+
class << self
|
11
|
+
def source_target_basename(file)
|
12
|
+
m = /^(.+)\.(?:source|target)(?:\..+)?$/.match(File.basename(file))
|
13
|
+
m ? m[1] : nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :fixtures_directory
|
18
|
+
|
19
|
+
def initialize(fixtures_directory)
|
20
|
+
@fixtures_directory = fixtures_directory
|
21
|
+
end
|
22
|
+
|
23
|
+
def source_target_files
|
24
|
+
sources_targets_basenames.map do |basename|
|
25
|
+
OpenStruct.new(source: source_file(basename), target: target_file(basename))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def source_files
|
30
|
+
r = []
|
31
|
+
source_target_files.each do |st|
|
32
|
+
r << st.source if st.source
|
33
|
+
end
|
34
|
+
r
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def target_file(basename)
|
40
|
+
fixture_file(basename, 'target')
|
41
|
+
end
|
42
|
+
|
43
|
+
def source_file(basename)
|
44
|
+
fixture_file(basename, 'source')
|
45
|
+
end
|
46
|
+
|
47
|
+
def fixture_file(basename, suffix)
|
48
|
+
prefix = "#{basename}.#{suffix}"
|
49
|
+
Dir.foreach(fixtures_directory) do |item|
|
50
|
+
next if item == '.' || item == '..'
|
51
|
+
return File.expand_path(item, fixtures_directory) if item.starts_with?(prefix)
|
52
|
+
end
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
|
56
|
+
def sources_targets_basenames
|
57
|
+
basenames = Set.new
|
58
|
+
Dir.foreach(fixtures_directory) do |item|
|
59
|
+
next if item == '.' || item == '..'
|
60
|
+
b = self.class.source_target_basename(item)
|
61
|
+
basenames << b if b.present?
|
62
|
+
end
|
63
|
+
basenames
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'source_target_fixtures'
|
4
|
+
|
5
|
+
RSpec.shared_examples 'source_target_fixtures' do |spec_file| # rubocop:disable Metrics/BlockLength
|
6
|
+
let(:spec_file) { spec_file }
|
7
|
+
|
8
|
+
it 'fixtures directory should exist' do
|
9
|
+
expect(::File.directory?(fixtures_dir)).to be true
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'in fixtures directory' do
|
13
|
+
it 'should have at least one file' do
|
14
|
+
expect(source_target_fixtures.source_target_files.count).to be > 0
|
15
|
+
end
|
16
|
+
|
17
|
+
if ENV['WRITE_TARGET_FIXTURES']
|
18
|
+
it 'should write target data for all files' do
|
19
|
+
source_target_fixtures.source_files.each do |source_file|
|
20
|
+
sd = sort_results(source_data(source_file))
|
21
|
+
basename = ::Aranha::Spec::SourceTargetFixtures.source_target_basename(source_file)
|
22
|
+
target_file = File.expand_path("../#{basename}.target.yaml", source_file)
|
23
|
+
File.write(target_file, sd.to_yaml)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
else
|
27
|
+
it 'should parse data for all files' do
|
28
|
+
source_target_fixtures.source_target_files.each do |st|
|
29
|
+
assert_source_target_complete(st)
|
30
|
+
sd = source_data(st.source)
|
31
|
+
td = YAML.load_file(st.target)
|
32
|
+
expect(sort_results(sd)).to eq(sort_results(td))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def source_target_fixtures
|
39
|
+
@source_target_fixtures ||= ::Aranha::Spec::SourceTargetFixtures.new(fixtures_dir)
|
40
|
+
end
|
41
|
+
|
42
|
+
def assert_source_target_complete(st)
|
43
|
+
expect(st.source).to(be_truthy, "Source not found (Target: #{st.target})")
|
44
|
+
expect(st.target).to(be_truthy, "Target not found (Source: #{st.source})")
|
45
|
+
end
|
46
|
+
|
47
|
+
def source_data(source_file)
|
48
|
+
described_class.new(source_file).data
|
49
|
+
end
|
50
|
+
|
51
|
+
def fixtures_dir
|
52
|
+
::File.join(
|
53
|
+
::File.dirname(spec_file),
|
54
|
+
::File.basename(spec_file, '.*') + '_files'
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
def sort_results(r)
|
59
|
+
r
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,685 @@
|
|
1
|
+
application/andrew-inset
|
2
|
+
application/applixware
|
3
|
+
application/atomcat+xml
|
4
|
+
application/atomsvc+xml
|
5
|
+
application/atom+xml
|
6
|
+
application/ccxml+xml,
|
7
|
+
application/cdmi-capability
|
8
|
+
application/cdmi-container
|
9
|
+
application/cdmi-domain
|
10
|
+
application/cdmi-object
|
11
|
+
application/cdmi-queue
|
12
|
+
application/cu-seeme
|
13
|
+
application/davmount+xml
|
14
|
+
application/dssc+der
|
15
|
+
application/dssc+xml
|
16
|
+
application/ecmascript
|
17
|
+
application/emma+xml
|
18
|
+
application/epub+zip
|
19
|
+
application/exi
|
20
|
+
application/font-tdpfr
|
21
|
+
application/hyperstudio
|
22
|
+
application/ipfix
|
23
|
+
application/java-archive
|
24
|
+
application/javascript
|
25
|
+
application/java-serializd-object
|
26
|
+
application/java-vm
|
27
|
+
application/json
|
28
|
+
application/mac-binhex40
|
29
|
+
application/mac-compactpro
|
30
|
+
application/mads+xml
|
31
|
+
application/marc
|
32
|
+
application/marcxml+xml
|
33
|
+
application/mathematica
|
34
|
+
application/mathml+xml
|
35
|
+
application/mbox
|
36
|
+
application/mediaservercontrol+xml
|
37
|
+
application/metalink4+xml
|
38
|
+
application/mets+xml
|
39
|
+
application/mods+xml
|
40
|
+
application/mp21
|
41
|
+
application/mp4
|
42
|
+
application/msword
|
43
|
+
application/mxf
|
44
|
+
application/nd.uiq.theme
|
45
|
+
application/octet-stream
|
46
|
+
application/oda
|
47
|
+
application/oebps-package+xml
|
48
|
+
application/ogg
|
49
|
+
application/onenote
|
50
|
+
application/patch-ops-error+xml
|
51
|
+
application/pdf
|
52
|
+
application/pgp-encrypted
|
53
|
+
application/pgp-signature
|
54
|
+
application/pics-rules
|
55
|
+
application/pkcs10
|
56
|
+
application/pkcs7-mime
|
57
|
+
application/pkcs7-signature
|
58
|
+
application/pkcs8
|
59
|
+
application/pkix-attr-cert
|
60
|
+
application/pkix-cert
|
61
|
+
application/pkixcmp
|
62
|
+
application/pkix-crl
|
63
|
+
application/pkix-pkipath
|
64
|
+
application/pls+xml
|
65
|
+
application/postscript
|
66
|
+
application/prs.cww
|
67
|
+
application/pskc+xml
|
68
|
+
application/rdf+xml
|
69
|
+
application/reginfo+xml
|
70
|
+
application/relax-ng-compact-syntax
|
71
|
+
application/resource-lists-diff+xml
|
72
|
+
application/resource-lists+xml
|
73
|
+
application/rls-services+xml
|
74
|
+
application/rsd+xml
|
75
|
+
application/rss+xml
|
76
|
+
application/rtf
|
77
|
+
application/sbml+xml
|
78
|
+
application/scvp-cv-request
|
79
|
+
application/scvp-vp-request
|
80
|
+
application/scvp-vp-response
|
81
|
+
application/sdp
|
82
|
+
application/set-payment-initiation
|
83
|
+
application/set-reistration-initiation
|
84
|
+
application/shf+xml
|
85
|
+
application/smil+xml
|
86
|
+
application/sml+xml
|
87
|
+
application/sparql-query
|
88
|
+
application/sparql-results+xml
|
89
|
+
application/srgs
|
90
|
+
application/srgs+xml
|
91
|
+
application/sru+xml
|
92
|
+
application/svp-cv-response
|
93
|
+
application/tei+xml
|
94
|
+
application/thraud+xml
|
95
|
+
application/timestamped-data
|
96
|
+
application/vn.contact.cmsg
|
97
|
+
application/vn.crick.clicker.wordbank
|
98
|
+
application/vnd.3gp2.tcap
|
99
|
+
application/vnd.3gpp.pic-bw-large
|
100
|
+
application/vnd.3gpp.pic-bw-small
|
101
|
+
application/vnd.3gpp.pic-bw-var
|
102
|
+
application/vnd.3m.post-it-notes
|
103
|
+
application/vnd.accpac.simply.aso
|
104
|
+
application/vnd.accpac.simply.imp
|
105
|
+
application/vnd.acucobol
|
106
|
+
application/vnd.acucorp
|
107
|
+
application/vnd.adobe.air-application-installer-package+zip
|
108
|
+
application/vnd.adobe.fxp
|
109
|
+
application/vnd.adobe.xdp+xml
|
110
|
+
application/vnd.adobe.xfdf
|
111
|
+
application/vnd.ahead.space
|
112
|
+
application/vnd.airzip.filesecure.azf
|
113
|
+
application/vnd.airzip.filesecure.azs
|
114
|
+
application/vnd.amazon.ebook
|
115
|
+
application/vnd.americandynamics.acc
|
116
|
+
application/vnd.amiga.ami
|
117
|
+
application/vnd.android.package-archive
|
118
|
+
application/vnd.anser-web-certificate-issue-initiation
|
119
|
+
application/vnd.anser-web-funds-transfer-initiation
|
120
|
+
application/vnd.antix.game-component
|
121
|
+
application/vnd.apple.installe+xml
|
122
|
+
application/vnd.apple.mpegurl
|
123
|
+
application/vnd.aristanetworks.swi
|
124
|
+
application/vnd.asis.opendocument.text-template
|
125
|
+
application/vnd.audiograph
|
126
|
+
application/vnd.blueice.multipass
|
127
|
+
application/vnd.bm
|
128
|
+
application/vnd.businessobjects
|
129
|
+
application/vnd.chemdraw+xml
|
130
|
+
application/vnd.chipnuts.karaoke-mmd
|
131
|
+
application/vnd.cinderella
|
132
|
+
application/vnd.claymore
|
133
|
+
application/vnd.cloanto.rp9
|
134
|
+
application/vnd.clonk.c4group
|
135
|
+
applicationvnd.cluetrust.cartomobile-config
|
136
|
+
application/vnd.cluetrust.cartomobile-config-pkg
|
137
|
+
application/vnd.commonspace
|
138
|
+
application/vnd.cosmocaller
|
139
|
+
application/vnd.crick.clicker
|
140
|
+
application/vnd.crick.clicker.keyboard
|
141
|
+
application/vnd.crick.clicker.palette
|
142
|
+
application/vnd.crick.clicker.template
|
143
|
+
application/vnd.criticaltools.wbs+xml
|
144
|
+
application/vnd.ctc-posml
|
145
|
+
application/vnd.cups-ppd
|
146
|
+
application/vnd.curl.car
|
147
|
+
application/vnd.curl.pcurl
|
148
|
+
application/vnd.data-vision.rdz
|
149
|
+
application/vnd.dolby.mlp
|
150
|
+
application/vnd.dpgraph
|
151
|
+
application/vnd.dreamfactory
|
152
|
+
application/vnd.dvb.ait
|
153
|
+
application/vnd.dvb.service
|
154
|
+
application/vnd.dynageo
|
155
|
+
application/vnd.ecowin.chart
|
156
|
+
application/vnd.enliven
|
157
|
+
application/vn.denovo.fcselayout-link
|
158
|
+
application/vnd.epson.esf
|
159
|
+
application/vnd.epson.msf
|
160
|
+
application/vnd.epson.quickanime
|
161
|
+
application/vnd.epson.salt
|
162
|
+
application/vnd.epson.ssf
|
163
|
+
application/vnd.eszigno3+xml
|
164
|
+
application/vnd.ezpix-album
|
165
|
+
application/vnd.ezpix-package
|
166
|
+
application/vnd.fdf
|
167
|
+
application/vnd.fdsn.seed
|
168
|
+
application/vnd.flographit
|
169
|
+
application/vnd.fluxtime.clip
|
170
|
+
application/vnd.framemaker
|
171
|
+
application/vnd.frogans.fnc
|
172
|
+
application/vnd.frogans.ltf
|
173
|
+
application/vnd.fsc.weblaunch
|
174
|
+
application/vnd.fujitsu.oasys2
|
175
|
+
application/vnd.fujitsu.oasys3
|
176
|
+
application/vnd.fujitsu.oasysgp
|
177
|
+
application/vnd.fujitsu.oasysprs
|
178
|
+
application/vnd.fujitu.oasys
|
179
|
+
application/vnd.fujixerox.ddd
|
180
|
+
application/vnd.fujixerox.docuworks
|
181
|
+
application/vnd.fujixerox.docuworks.binder
|
182
|
+
application/vnd.fuzzysheet
|
183
|
+
application/vnd.genomatix.tuxedo
|
184
|
+
application/vnd.geogebra.file
|
185
|
+
application/vnd.geogebra.tool
|
186
|
+
application/vnd.geometry-explorer
|
187
|
+
application/vnd.geonext
|
188
|
+
application/vnd.geoplan
|
189
|
+
application/vnd.geospace
|
190
|
+
application/vnd.gmx
|
191
|
+
application/vnd.google-earth.kml+xml
|
192
|
+
application/vnd.google-earth.kmz
|
193
|
+
application/vnd.grafeq
|
194
|
+
application/vnd.groove-account
|
195
|
+
application/vnd.groove-help
|
196
|
+
application/vnd.groove-identity-message
|
197
|
+
application/vnd.groove-injector
|
198
|
+
application/vnd.groove-tool-message
|
199
|
+
application/vnd.groove-tool-template
|
200
|
+
application/vnd.groove-vcar
|
201
|
+
application/vnd.hal+xml
|
202
|
+
application/vnd.handheld-entertainment+xml
|
203
|
+
application/vnd.hbci
|
204
|
+
application/vnd.hhe.lesson-player
|
205
|
+
application/vnd.hp-hpgl
|
206
|
+
application/vnd.hp-hpid
|
207
|
+
application/vnd.hp-hps
|
208
|
+
application/vnd.hp-jlyt
|
209
|
+
application/vnd.hp-pcl
|
210
|
+
application/vnd.hp-pclxl
|
211
|
+
application/vnd.hydrostatix.sof-data
|
212
|
+
application/vnd.hzn-3d-crossword
|
213
|
+
application/vnd.ibm.minipay
|
214
|
+
application/vnd.ibm.modcap
|
215
|
+
application/vnd.ibm.rights-management
|
216
|
+
application/vnd.ibm.securecontainer
|
217
|
+
application/vnd.iccprofile
|
218
|
+
application/vnd.igloader
|
219
|
+
application/vnd.immervision-ivp
|
220
|
+
application/vnd.immervision-ivu
|
221
|
+
application/vnd.insors.igm
|
222
|
+
application/vnd.intercon.formnet
|
223
|
+
application/vnd.intergeo
|
224
|
+
application/vnd.intu.qbo
|
225
|
+
application/vnd.intu.qfx
|
226
|
+
application/vnd.ipunplugged.rcprofile
|
227
|
+
application/vnd.irepository.package+xml
|
228
|
+
application/vnd.isac.fcs
|
229
|
+
application/vnd.is-xpr
|
230
|
+
application/vnd.jam
|
231
|
+
application/vnd.jcp.javame.midlet-rms
|
232
|
+
application/vnd.jisp
|
233
|
+
application/vnd.joost.joda-archive
|
234
|
+
application/vnd.kahootz
|
235
|
+
application/vnd.kde.karbon
|
236
|
+
application/vnd.kde.kchart
|
237
|
+
application/vnd.kde.kformula
|
238
|
+
application/vnd.kde.kivio
|
239
|
+
application/vnd.kde.kontour
|
240
|
+
application/vnd.kde.kpresenter
|
241
|
+
application/vnd.kde.kspread
|
242
|
+
application/vnd.kde.kword
|
243
|
+
application/vnd.kenameaapp
|
244
|
+
application/vnd.kidspiration
|
245
|
+
application/vnd.kinar
|
246
|
+
application/vnd.koan
|
247
|
+
application/vnd.kodak-descriptor
|
248
|
+
application/vnd.las.las+xml
|
249
|
+
application/vnd.llamagraphics.life-balance.desktop
|
250
|
+
application/vnd.llamagraphics.life-balance.exchange+xml
|
251
|
+
application/vnd.lotus-1-2-3
|
252
|
+
application/vnd.lotus-approach
|
253
|
+
application/vnd.lotus-freelance
|
254
|
+
application/vnd.lotus-notes
|
255
|
+
application/vnd.lotus-organizer
|
256
|
+
application/vnd.lotus-screencam
|
257
|
+
application/vnd.lotus-wordro
|
258
|
+
application/vnd.macports.portpkg
|
259
|
+
application/vnd.mcd
|
260
|
+
application/vnd.medcalcdata
|
261
|
+
application/vnd.mediastation.cdkey
|
262
|
+
application/vnd.mfer
|
263
|
+
application/vnd.mfmp
|
264
|
+
application/vnd.micrografx.flo
|
265
|
+
application/vnd.micrografx.igx
|
266
|
+
application/vnd.mif
|
267
|
+
application/vnd.mobius.daf
|
268
|
+
application/vnd.mobius.dis
|
269
|
+
application/vnd.mobius.mbk
|
270
|
+
application/vnd.mobius.mqy
|
271
|
+
application/vnd.mobius.msl
|
272
|
+
application/vnd.mobius.plc
|
273
|
+
application/vnd.mobius.txf
|
274
|
+
application/vnd.mophun.application
|
275
|
+
application/vnd.mophun.certificate
|
276
|
+
application/vnd.mozilla.xul+xml
|
277
|
+
application/vnd.ms-artgalry
|
278
|
+
application/vnd.ms-ca-compressed
|
279
|
+
application/vnd.mseq
|
280
|
+
application/vnd.ms-excel
|
281
|
+
application/vnd.ms-excel.addin.macroenabled.12
|
282
|
+
application/vnd.ms-excelsheet.binary.macroenabled.12
|
283
|
+
application/vnd.ms-excel.sheet.macroenabled.12
|
284
|
+
application/vnd.ms-excel.template.macroenabled.12
|
285
|
+
application/vnd.ms-fontobject
|
286
|
+
application/vnd.ms-htmlhelp
|
287
|
+
application/vnd.msician
|
288
|
+
application/vnd.ms-ims
|
289
|
+
application/vnd.ms-lrm
|
290
|
+
application/vnd.ms-officetheme
|
291
|
+
application/vnd.ms-pki.seccat
|
292
|
+
application/vnd.ms-pki.stl
|
293
|
+
application/vnd.ms-powerpoint
|
294
|
+
application/vnd.ms-powerpoint.addin.macroenabled.12
|
295
|
+
application/vnd.ms-powerpoint.presentation.macroenabled.12
|
296
|
+
application/vnd.ms-powerpoint.slide.macroenabled.12
|
297
|
+
application/vnd.ms-powerpoint.slideshow.macroenabled.12
|
298
|
+
application/vnd.ms-powerpoint.template.macroenabled.12
|
299
|
+
application/vnd.ms-project
|
300
|
+
application/vnd.ms-word.document.macroenabled.12
|
301
|
+
application/vnd.ms-word.template.macroenabed.12
|
302
|
+
application/vnd.ms-works
|
303
|
+
application/vnd.ms-wpl
|
304
|
+
application/vnd.ms-xpsdocument
|
305
|
+
application/vnd.muvee.style
|
306
|
+
application/vnd.na
|
307
|
+
application/vnd.neurolanguage.nlu
|
308
|
+
application/vnd.noblenet-directory
|
309
|
+
application/vnd.noblenet-sealer
|
310
|
+
application/vnd.noblenet-web
|
311
|
+
application/vnd.nokia.n-gage.data
|
312
|
+
application/vnd.nokia.n-gage.symbian.install
|
313
|
+
application/vnd.nokia.radio-preset
|
314
|
+
application/vnd.nokia.radio-presets
|
315
|
+
application/vnd.novadigm.edm
|
316
|
+
application/vnd.novadigm.ext
|
317
|
+
application/vnd.novadim.edx
|
318
|
+
application/vnd.oasis.opendocumen.presentation-template
|
319
|
+
application/vnd.oasis.opendocument.char
|
320
|
+
application/vnd.oasis.opendocument.chart-template
|
321
|
+
application/vnd.oasis.opendocument.database
|
322
|
+
application/vnd.oasis.opendocument.formula
|
323
|
+
application/vnd.oasis.opendocument.formula-template
|
324
|
+
application/vnd.oasis.opendocument.graphics-template
|
325
|
+
application/vnd.oasis.opendocument.grapics
|
326
|
+
application/vnd.oasis.opendocument.image
|
327
|
+
application/vnd.oasis.opendocument.image-template
|
328
|
+
application/vnd.oasis.opendocument.presentation
|
329
|
+
application/vnd.oasis.opendocument.spreadsheet
|
330
|
+
application/vnd.oasis.opendocument.spreadsheet-template
|
331
|
+
application/vnd.oasis.opendocument.text
|
332
|
+
application/vnd.oasis.opendocument.text-master
|
333
|
+
application/vnd.oasis.opendocument.text-web
|
334
|
+
application/vnd.olpc-sugar
|
335
|
+
application/vnd.oma.dd2+xml
|
336
|
+
application/vnd.openofficeorg.extension
|
337
|
+
application/vnd.openxmformats-officedocument.wordprocessingml.document
|
338
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
339
|
+
application/vnd.openxmlformats-officedocument.presentationml.slide
|
340
|
+
application/vnd.openxmlformats-officedocument.presentationml.slideshw
|
341
|
+
application/vnd.openxmlformats-officedocument.presentationml.template
|
342
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
343
|
+
application/vnd.openxmlformats-officedocument.spreadsheetml.template
|
344
|
+
application/vnd.openxmlformats-officedocument.wordprocessingml.template
|
345
|
+
application/vnd.osgeo.mapguide.package
|
346
|
+
application/vnd.osgi.dp
|
347
|
+
application/vnd.palm
|
348
|
+
application/vnd.pawaafile
|
349
|
+
application/vnd.pg.format
|
350
|
+
application/vnd.pg.osasli
|
351
|
+
application/vnd.picsel
|
352
|
+
application/vnd.pmi.widget
|
353
|
+
application/vnd.pocketlearn
|
354
|
+
application/vnd.powerbuilder6
|
355
|
+
application/vnd.previewsystems.box
|
356
|
+
application/vnd.proteus.magazine
|
357
|
+
application/vnd.publishare-delta-tree
|
358
|
+
application/vnd.pvi.ptid1
|
359
|
+
application/vnd.quark.quarkxpress
|
360
|
+
application/vnd.realvnc.bed
|
361
|
+
application/vnd.recordare.musicxml
|
362
|
+
application/vnd.recordare.musicxml+xml
|
363
|
+
application/vnd.rig.cryptonote
|
364
|
+
application/vnd.rim.cod
|
365
|
+
application/vnd.rn-realmedia
|
366
|
+
application/vnd.route66.link66+xml
|
367
|
+
application/vnd.sailingtracker.track
|
368
|
+
application/vnd.seemail
|
369
|
+
application/vnd.sema
|
370
|
+
application/vnd.semd
|
371
|
+
application/vnd.semf
|
372
|
+
application/vnd.shana.informed.formdata
|
373
|
+
application/vnd.shana.informed.formtemplate
|
374
|
+
application/vnd.shana.informed.interchange
|
375
|
+
application/vnd.shana.informed.package
|
376
|
+
application/vnd.simtech-mindmapper
|
377
|
+
application/vnd.smaf
|
378
|
+
application/vnd.smart.teacher
|
379
|
+
application/vnd.solent.sdkm+xml
|
380
|
+
application/vnd.spotfire.dxp
|
381
|
+
application/vnd.spotfire.sfs
|
382
|
+
application/vnd.stardivision.calc
|
383
|
+
application/vnd.stardivision.draw
|
384
|
+
application/vnd.stardivision.impress
|
385
|
+
application/vnd.stardivision.math
|
386
|
+
application/vnd.stardivision.writer
|
387
|
+
application/vnd.stepmania.stepchart
|
388
|
+
application/vnd.sun.xl.impress.template
|
389
|
+
application/vnd.sun.xml.calc
|
390
|
+
application/vnd.sun.xml.calc.template
|
391
|
+
application/vnd.sun.xml.draw
|
392
|
+
application/vnd.sun.xml.draw.template
|
393
|
+
application/vnd.sun.xml.impress
|
394
|
+
application/vnd.sun.xml.math
|
395
|
+
application/vnd.sun.xml.writer
|
396
|
+
application/vnd.sun.xml.writer.global
|
397
|
+
application/vnd.sun.xml.writer.template
|
398
|
+
application/vnd.sus-calendar
|
399
|
+
application/vnd.svd
|
400
|
+
application/vnd.symbian.install
|
401
|
+
application/vnd.syncml.dm+wbxml
|
402
|
+
application/vnd.syncml.dm+xml
|
403
|
+
application/vnd.syncml+xml
|
404
|
+
application/vnd.tardivision.writer-global
|
405
|
+
application/vnd.tmobile-ivetv
|
406
|
+
application/vnd.to.intent-module-archive
|
407
|
+
applicationvnd.trid.tpt
|
408
|
+
application/vnd.triscape.mxs
|
409
|
+
application/vnd.trueapp
|
410
|
+
application/vnd.ufdl
|
411
|
+
application/vnd.ul
|
412
|
+
application/vnd.umajin
|
413
|
+
application/vnd.unity
|
414
|
+
application/vnd.uoml+xml
|
415
|
+
application/vnd.vcx
|
416
|
+
application/vnd.visionary
|
417
|
+
application/vnd.vsf
|
418
|
+
application/vnd.wap.wbxml
|
419
|
+
application/vnd.wap.wmlc
|
420
|
+
application/vnd.wap.wmlscriptc
|
421
|
+
application/vnd.webturb
|
422
|
+
application/vnd.wolfrm.player
|
423
|
+
application/vnd.wordperfect
|
424
|
+
application/vnd.wqd
|
425
|
+
application/vnd.wt.stf
|
426
|
+
application/vnd.xara
|
427
|
+
application/vnd.xfdl
|
428
|
+
application/vnd.yamaha.h-script
|
429
|
+
application/vnd.yamaha.hv-dic
|
430
|
+
application/vnd.yamaha.hv-voice
|
431
|
+
application/vnd.yamaha.openscoreformat
|
432
|
+
application/vnd.yamaha.openscoreformat.osfpvg+xml
|
433
|
+
application/vnd.yamaha.smaf-phrase
|
434
|
+
application/vnd.yellowriver-custom-menu
|
435
|
+
application/vnd.ymaha.smaf-audio
|
436
|
+
application/vnd.zzazz.deck+xml
|
437
|
+
application/vn.visio
|
438
|
+
application/voicexml+xml
|
439
|
+
application/widget
|
440
|
+
application/winhlp
|
441
|
+
application/wsdl+xml
|
442
|
+
application/wspolicy+xml
|
443
|
+
application/x-7z-compressed
|
444
|
+
application/x-abiword
|
445
|
+
application/x-ace-compressed
|
446
|
+
application/x-athorware-map
|
447
|
+
application/x-authorware-bin
|
448
|
+
application/x-authorware-seg
|
449
|
+
application/x-bcpio
|
450
|
+
application/x-bittorrent
|
451
|
+
application/x-bzip
|
452
|
+
application/x-bzip2
|
453
|
+
application/xcap-diff+xml
|
454
|
+
application/x-cdlink
|
455
|
+
application/x-chat
|
456
|
+
application/x-chess-pgn
|
457
|
+
application/x-cpio
|
458
|
+
application/x-csh
|
459
|
+
application/x-debian-package
|
460
|
+
application/x-director
|
461
|
+
application/x-doom
|
462
|
+
application/x-dtbncx+xml
|
463
|
+
application/x-dtbook+xml
|
464
|
+
application/x-dtbresource+xml
|
465
|
+
application/x-dvi
|
466
|
+
application/xenc+xml
|
467
|
+
application/x-font-bdf
|
468
|
+
application/x-font-ghostscript
|
469
|
+
application/x-font-linux-psf
|
470
|
+
application/x-font-otf
|
471
|
+
application/x-font-pcf
|
472
|
+
application/x-font-snf
|
473
|
+
application/x-font-ttf
|
474
|
+
application/xfont-type1
|
475
|
+
application/x-font-woff
|
476
|
+
application/x-futuresplash
|
477
|
+
application/x-gnumeric
|
478
|
+
application/x-gtar
|
479
|
+
application/x-hdf
|
480
|
+
application/xhtml+xml
|
481
|
+
application/x-java-jnlp-file
|
482
|
+
application/x-latex
|
483
|
+
application/xml
|
484
|
+
application/xml-dtd
|
485
|
+
application/x-mobipocket-ebook
|
486
|
+
application/x-msaccess
|
487
|
+
application/x-ms-application
|
488
|
+
application/x-msbinder
|
489
|
+
application/x-mscardfile
|
490
|
+
application/x-msclip
|
491
|
+
application/x-msdownload
|
492
|
+
application/x-msmediaview
|
493
|
+
application/x-msmetafile
|
494
|
+
application/x-msmoney
|
495
|
+
application/x-mspublisher
|
496
|
+
application/x-msschedule
|
497
|
+
application/x-msterminal
|
498
|
+
application/x-ms-wmd
|
499
|
+
application/x-ms-wmz
|
500
|
+
application/x-mswrite
|
501
|
+
application/x-ms-xbap
|
502
|
+
application/x-netcdf
|
503
|
+
application/xop+xml
|
504
|
+
application/x-pkcs12
|
505
|
+
application/x-pkcs7-certificates
|
506
|
+
application/x-pkcs7-certreqresp
|
507
|
+
application/x-rar-compressed
|
508
|
+
application/x-sh
|
509
|
+
application/x-shar
|
510
|
+
application/x-shockwave-flash
|
511
|
+
application/x-silverlight-app
|
512
|
+
application/xslt+xml
|
513
|
+
application/xspf+xml
|
514
|
+
application/x-stuffit
|
515
|
+
application/x-stuffitx
|
516
|
+
application/x-sv4cpio
|
517
|
+
application/x-sv4crc
|
518
|
+
application/x-tar
|
519
|
+
application/x-tcl
|
520
|
+
application/x-tex
|
521
|
+
application/x-texinfo
|
522
|
+
application/x-tex-tfm
|
523
|
+
application/x-ustar
|
524
|
+
application/xv+xml
|
525
|
+
application/x-wais-source
|
526
|
+
application/x-x509-ca-cert
|
527
|
+
application/x-xfig
|
528
|
+
application/x-xpinstall
|
529
|
+
application/yang
|
530
|
+
application/yin+xml
|
531
|
+
application/zip
|
532
|
+
audio/adpcm
|
533
|
+
audio/basic
|
534
|
+
audio/midi
|
535
|
+
audio/mp4
|
536
|
+
audio/mpeg
|
537
|
+
audio/ogg
|
538
|
+
audio/vnd.dece.audio
|
539
|
+
audio/vnd.digital-winds
|
540
|
+
audio/vnd.dra
|
541
|
+
audio/vnd.dts
|
542
|
+
audio/vnd.dts.hd
|
543
|
+
audio/vnd.lucent.voice
|
544
|
+
audio/vnd.ms-playready.media.pya
|
545
|
+
audio/vnd.nuera.ecelp4800
|
546
|
+
audio/vnd.nuera.ecelp7470
|
547
|
+
audio/vnd.nuera.ecelp9600
|
548
|
+
audio/vnd.rip
|
549
|
+
audio/webm
|
550
|
+
audio/x-aac
|
551
|
+
audio/x-aiff
|
552
|
+
audio/x-mpegurl
|
553
|
+
audio/x-ms-wax
|
554
|
+
audio/x-ms-wma
|
555
|
+
audio/x-pn-realaudio
|
556
|
+
audio/x-pn-realaudio-plugin
|
557
|
+
audio/x-wav
|
558
|
+
chemical/x-cdx
|
559
|
+
chemical/x-cif
|
560
|
+
chemical/x-cmdf
|
561
|
+
chemical/x-cml
|
562
|
+
chemical/x-csml
|
563
|
+
chemical/x-xyz
|
564
|
+
image/bmp
|
565
|
+
image/cgm
|
566
|
+
image/g3fax
|
567
|
+
image/gif
|
568
|
+
image/ief
|
569
|
+
image/jpeg
|
570
|
+
image/ktx
|
571
|
+
image/png
|
572
|
+
image/-portable-bitmap
|
573
|
+
image/prs.btif
|
574
|
+
image/svg+xml
|
575
|
+
image/tiff
|
576
|
+
image/vnd.adobe.photoshop
|
577
|
+
image/vnd.dece.graphic
|
578
|
+
image/vnd.djvu
|
579
|
+
image/vnd.dvb.subtitle
|
580
|
+
image/vnd.dxf
|
581
|
+
image/vnd.fastbidsheet
|
582
|
+
image/vnd.fpx
|
583
|
+
image/vnd.fst
|
584
|
+
image/vnd.fujixerox.edmics-mmr
|
585
|
+
image/vnd.fujixerox.edmics-rlc
|
586
|
+
image/vnd.ms-modi
|
587
|
+
image/vnd.net-fpx
|
588
|
+
image/vnd.wap.wbmp
|
589
|
+
image/vnd.xiff
|
590
|
+
image/webp
|
591
|
+
image/x-cmu-raster
|
592
|
+
image/x-cmx
|
593
|
+
image/x-freehand
|
594
|
+
image/x-icon
|
595
|
+
image/x-pcx
|
596
|
+
image/x-pict
|
597
|
+
image/x-portable-anymap
|
598
|
+
image/x-portable-graymap
|
599
|
+
image/x-portable-pixmap
|
600
|
+
image/x-rgb
|
601
|
+
image/x-xbitmap
|
602
|
+
image/x-xpixmap
|
603
|
+
image/x-xwindowump
|
604
|
+
imag/vnd.dwg
|
605
|
+
message/rfc82
|
606
|
+
mode/iges
|
607
|
+
model/mesh
|
608
|
+
model/vnd.collada+xml
|
609
|
+
model/vnd.dwf
|
610
|
+
model/vnd.gdl
|
611
|
+
model/vnd.mts
|
612
|
+
model/vnd.vtu
|
613
|
+
model/vn.gtw
|
614
|
+
model/vrml
|
615
|
+
text/calendar
|
616
|
+
text/css
|
617
|
+
text/csv
|
618
|
+
text/html
|
619
|
+
text/n3
|
620
|
+
text/plain
|
621
|
+
text/plain-bas
|
622
|
+
text/prs.lines.tag
|
623
|
+
text/richtex
|
624
|
+
text/sgml
|
625
|
+
text/tab-separated-values
|
626
|
+
text/troff
|
627
|
+
text/turtle
|
628
|
+
text/uri-list
|
629
|
+
text/vnd.crl.scurl
|
630
|
+
text/vnd.curl
|
631
|
+
text/vnd.curl.dcurl
|
632
|
+
text/vnd.curl.mcurl
|
633
|
+
text/vnd.fly
|
634
|
+
text/vnd.fmi.flexstor
|
635
|
+
text/vnd.graphviz
|
636
|
+
text/vnd.in3d.3dml
|
637
|
+
text/vnd.in3d.spot
|
638
|
+
text/vnd.sun.j2me.app-descriptor
|
639
|
+
text/vnd.wap.wml
|
640
|
+
text/vnd.wap.wmlscript
|
641
|
+
text/x-asm
|
642
|
+
text/x-c
|
643
|
+
text/x-fortran
|
644
|
+
text/x-java-source,java
|
645
|
+
text/x-pascal
|
646
|
+
text/x-setext
|
647
|
+
text/x-uuencode
|
648
|
+
text/x-vcalendar
|
649
|
+
text/x-vcard
|
650
|
+
text/yaml
|
651
|
+
video/3gpp
|
652
|
+
video/3gpp2
|
653
|
+
video/h261
|
654
|
+
video/h263
|
655
|
+
video/h264
|
656
|
+
video/jpeg
|
657
|
+
video/jpm
|
658
|
+
video/mj2
|
659
|
+
video/mp4
|
660
|
+
video/mpeg
|
661
|
+
video/ogg
|
662
|
+
video/quicktime
|
663
|
+
video/vnd.dece.mobile
|
664
|
+
video/vnd.dece.pd
|
665
|
+
video/vnd.dece.sd
|
666
|
+
video/vnd.dece.video
|
667
|
+
video/vnd.dec.hd
|
668
|
+
video/vnd.fvt
|
669
|
+
video/vnd.mpegurl
|
670
|
+
video/vnd.ms-playready.media.pyv
|
671
|
+
video/vnd.uvvu.mp4
|
672
|
+
video/vnd.vivo
|
673
|
+
video/webm
|
674
|
+
video/x-f4v
|
675
|
+
video/x-fli
|
676
|
+
video/x-flv
|
677
|
+
video/x-m4v
|
678
|
+
video/x-ms-asf
|
679
|
+
video/x-msvieo
|
680
|
+
video/x-ms-wm
|
681
|
+
video/x-ms-wmv
|
682
|
+
video/x-ms-wmx
|
683
|
+
video/x-ms-wvx
|
684
|
+
video/x-sgi-movie
|
685
|
+
x-conference/x-cooltalk
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'selenium-webdriver'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Selenium
|
7
|
+
class DriverFactory
|
8
|
+
class << self
|
9
|
+
DEFAULT_DOWNLOAD_DIR = '/tmp/aranha_download_dir'
|
10
|
+
|
11
|
+
def create_driver(options = {})
|
12
|
+
options = options.with_indifferent_access
|
13
|
+
options[:download_dir] ||= DEFAULT_DOWNLOAD_DIR
|
14
|
+
create_firefox_driver(options)
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def create_firefox_driver(options)
|
20
|
+
::Selenium::WebDriver.for(
|
21
|
+
:firefox,
|
22
|
+
options: ::Selenium::WebDriver::Firefox::Options.new(
|
23
|
+
profile: create_firefox_profile(options)
|
24
|
+
)
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_firefox_profile(options)
|
29
|
+
profile = ::Selenium::WebDriver::Firefox::Profile.new
|
30
|
+
profile['browser.download.dir'] = options[:download_dir]
|
31
|
+
profile['browser.download.folderList'] = 2
|
32
|
+
profile['browser.helperApps.neverAsk.saveToDisk'] = auto_download_mime_types.join(';')
|
33
|
+
profile['pdfjs.disabled'] = true
|
34
|
+
profile
|
35
|
+
end
|
36
|
+
|
37
|
+
def auto_download_mime_types
|
38
|
+
::File.read(::File.join(__dir__, 'auto_download_mime_types')).each_line.map(&:strip)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'aranha/selenium/driver_factory'
|
4
|
+
|
5
|
+
module Aranha
|
6
|
+
module Selenium
|
7
|
+
class Session < ::SimpleDelegator
|
8
|
+
attr_reader :downloads, :wait
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@downloads = Downloads.new
|
12
|
+
@wait = ::Selenium::WebDriver::Wait.new(timeout: 15)
|
13
|
+
super(::Aranha::Selenium::DriverFactory.create_driver download_dir: @downloads.dir)
|
14
|
+
end
|
15
|
+
|
16
|
+
def find_or_not_element(find_element_args)
|
17
|
+
r = find_elements(find_element_args)
|
18
|
+
r.any? ? r.first : nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def wait_for_click(find_element_args)
|
22
|
+
wait.until do
|
23
|
+
element = find_element(find_element_args)
|
24
|
+
element ? element_click(element) : nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def wait_for_element(find_element_args)
|
29
|
+
wait.until { find_element(find_element_args) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def wait_for_download
|
33
|
+
initial_downloads = downloads.current
|
34
|
+
yield
|
35
|
+
new_downloads = []
|
36
|
+
wait.until do
|
37
|
+
new_downloads = downloads.current - initial_downloads
|
38
|
+
new_downloads.any?
|
39
|
+
end
|
40
|
+
new_downloads.first
|
41
|
+
end
|
42
|
+
|
43
|
+
def current_source
|
44
|
+
element = find_element(xpath: '/html[1]')
|
45
|
+
raise 'Root element not found' unless element
|
46
|
+
|
47
|
+
s = element.attribute('innerHTML')
|
48
|
+
"<html>\n#{s}\n</html>\n"
|
49
|
+
end
|
50
|
+
|
51
|
+
class Downloads
|
52
|
+
attr_reader :dir
|
53
|
+
|
54
|
+
def initialize
|
55
|
+
@dir = ::Dir.mktmpdir
|
56
|
+
end
|
57
|
+
|
58
|
+
def current
|
59
|
+
Dir.glob("#{dir}/**/*")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def element_click(element)
|
66
|
+
element.click
|
67
|
+
element
|
68
|
+
rescue ::Selenium::WebDriver::Error::ElementClickInterceptedError,
|
69
|
+
::Selenium::WebDriver::Error::ElementNotInteractableError
|
70
|
+
nil
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/aranha/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aranha
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo H. Bogoni
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: active_scaffold
|
@@ -66,6 +66,26 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 4.2.10
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: selenium-webdriver
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '3.142'
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 3.142.3
|
79
|
+
type: :runtime
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - "~>"
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '3.142'
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 3.142.3
|
69
89
|
- !ruby/object:Gem::Dependency
|
70
90
|
name: sqlite3
|
71
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -110,11 +130,17 @@ files:
|
|
110
130
|
- lib/aranha/fixtures/download.rb
|
111
131
|
- lib/aranha/parsers/base.rb
|
112
132
|
- lib/aranha/parsers/html/base.rb
|
133
|
+
- lib/aranha/parsers/html/item.rb
|
113
134
|
- lib/aranha/parsers/html/item_list.rb
|
114
135
|
- lib/aranha/parsers/html/node/base.rb
|
115
136
|
- lib/aranha/parsers/html/node/default.rb
|
116
137
|
- lib/aranha/parsers/invalid_state_exception.rb
|
138
|
+
- lib/aranha/parsers/spec/source_target_fixtures.rb
|
139
|
+
- lib/aranha/parsers/spec/source_target_fixtures_example.rb
|
117
140
|
- lib/aranha/processor.rb
|
141
|
+
- lib/aranha/selenium/auto_download_mime_types
|
142
|
+
- lib/aranha/selenium/driver_factory.rb
|
143
|
+
- lib/aranha/selenium/session.rb
|
118
144
|
- lib/aranha/version.rb
|
119
145
|
- lib/tasks/aranha_tasks.rake
|
120
146
|
- test/aranha_test.rb
|
@@ -181,42 +207,42 @@ signing_key:
|
|
181
207
|
specification_version: 4
|
182
208
|
summary: Rails utilities for web crawling.
|
183
209
|
test_files:
|
184
|
-
- test/
|
210
|
+
- test/dummy/Rakefile
|
211
|
+
- test/dummy/README.rdoc
|
212
|
+
- test/dummy/config.ru
|
213
|
+
- test/dummy/config/boot.rb
|
214
|
+
- test/dummy/config/database.yml
|
215
|
+
- test/dummy/config/secrets.yml
|
216
|
+
- test/dummy/config/locales/en.yml
|
217
|
+
- test/dummy/config/application.rb
|
218
|
+
- test/dummy/config/environments/development.rb
|
219
|
+
- test/dummy/config/environments/test.rb
|
220
|
+
- test/dummy/config/environments/production.rb
|
221
|
+
- test/dummy/config/environment.rb
|
185
222
|
- test/dummy/config/routes.rb
|
186
223
|
- test/dummy/config/initializers/assets.rb
|
187
224
|
- test/dummy/config/initializers/cookies_serializer.rb
|
188
|
-
- test/dummy/config/initializers/
|
225
|
+
- test/dummy/config/initializers/inflections.rb
|
189
226
|
- test/dummy/config/initializers/session_store.rb
|
190
|
-
- test/dummy/config/initializers/backtrace_silencers.rb
|
191
227
|
- test/dummy/config/initializers/wrap_parameters.rb
|
228
|
+
- test/dummy/config/initializers/to_time_preserves_timezone.rb
|
192
229
|
- test/dummy/config/initializers/filter_parameter_logging.rb
|
193
|
-
- test/dummy/config/initializers/
|
230
|
+
- test/dummy/config/initializers/backtrace_silencers.rb
|
194
231
|
- test/dummy/config/initializers/mime_types.rb
|
195
|
-
- test/dummy/
|
196
|
-
- test/dummy/config/secrets.yml
|
197
|
-
- test/dummy/config/locales/en.yml
|
198
|
-
- test/dummy/config/environment.rb
|
199
|
-
- test/dummy/config/boot.rb
|
200
|
-
- test/dummy/config/application.rb
|
201
|
-
- test/dummy/config/environments/production.rb
|
202
|
-
- test/dummy/config/environments/test.rb
|
203
|
-
- test/dummy/config/environments/development.rb
|
204
|
-
- test/dummy/Rakefile
|
205
|
-
- test/dummy/public/favicon.ico
|
206
|
-
- test/dummy/public/404.html
|
207
|
-
- test/dummy/public/500.html
|
208
|
-
- test/dummy/public/422.html
|
209
|
-
- test/dummy/config.ru
|
210
|
-
- test/dummy/app/assets/stylesheets/application.css
|
211
|
-
- test/dummy/app/assets/javascripts/application.js
|
212
|
-
- test/dummy/app/helpers/application_helper.rb
|
232
|
+
- test/dummy/db/schema.rb
|
213
233
|
- test/dummy/app/views/layouts/application.html.erb
|
214
234
|
- test/dummy/app/controllers/application_controller.rb
|
215
|
-
- test/dummy/
|
235
|
+
- test/dummy/app/helpers/application_helper.rb
|
236
|
+
- test/dummy/app/assets/stylesheets/application.css
|
237
|
+
- test/dummy/app/assets/javascripts/application.js
|
238
|
+
- test/dummy/public/422.html
|
239
|
+
- test/dummy/public/404.html
|
240
|
+
- test/dummy/public/favicon.ico
|
241
|
+
- test/dummy/public/500.html
|
216
242
|
- test/dummy/bin/bundle
|
243
|
+
- test/dummy/bin/setup
|
217
244
|
- test/dummy/bin/rails
|
218
245
|
- test/dummy/bin/rake
|
219
|
-
- test/
|
220
|
-
- test/dummy/README.rdoc
|
221
|
-
- test/integration/navigation_test.rb
|
246
|
+
- test/aranha_test.rb
|
222
247
|
- test/test_helper.rb
|
248
|
+
- test/integration/navigation_test.rb
|