zorki 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Gemfile.lock +76 -40
- data/lib/zorki/scrapers/scraper.rb +2 -4
- data/lib/zorki/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e720f45ab99fab246cdb6431d001833cc33bdee49278947e425c9b88654ec454
|
4
|
+
data.tar.gz: e51242c9b1c64a88a5b00eb0450ed5ca602addd1ab36676fbe3ea1e8aa0878e2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca1546e4c18d1d6d90723ac13d6e5c255dcd6d20761614a3a030be157aaf0d59ae886805ecdf17fb9718574a2c59a3ffedc14464f64221a060ddbd520cfe740f
|
7
|
+
data.tar.gz: cfcb54bcc569a005406ad3b52bb98e6990bb8089a80bf295c6f4a8331268e571b7c6f98afbc29c188b5de54ef2e2fded7d788cda42f618580989f01cb2fb9522
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
zorki (0.1.
|
4
|
+
zorki (0.1.5)
|
5
5
|
apparition
|
6
6
|
capybara
|
7
7
|
oj
|
@@ -12,30 +12,39 @@ PATH
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
actionpack (7.0.
|
16
|
-
actionview (= 7.0.
|
17
|
-
activesupport (= 7.0.
|
18
|
-
|
15
|
+
actionpack (7.1.0.beta1)
|
16
|
+
actionview (= 7.1.0.beta1)
|
17
|
+
activesupport (= 7.1.0.beta1)
|
18
|
+
nokogiri (>= 1.8.5)
|
19
|
+
rack (>= 2.2.4)
|
20
|
+
rack-session (>= 1.0.1)
|
19
21
|
rack-test (>= 0.6.3)
|
20
|
-
rails-dom-testing (~> 2.
|
21
|
-
rails-html-sanitizer (~> 1.
|
22
|
-
actionview (7.0.
|
23
|
-
activesupport (= 7.0.
|
22
|
+
rails-dom-testing (~> 2.2)
|
23
|
+
rails-html-sanitizer (~> 1.6)
|
24
|
+
actionview (7.1.0.beta1)
|
25
|
+
activesupport (= 7.1.0.beta1)
|
24
26
|
builder (~> 3.1)
|
25
|
-
erubi (~> 1.
|
26
|
-
rails-dom-testing (~> 2.
|
27
|
-
rails-html-sanitizer (~> 1.
|
28
|
-
activesupport (7.0.
|
27
|
+
erubi (~> 1.11)
|
28
|
+
rails-dom-testing (~> 2.2)
|
29
|
+
rails-html-sanitizer (~> 1.6)
|
30
|
+
activesupport (7.1.0.beta1)
|
31
|
+
base64
|
32
|
+
bigdecimal
|
29
33
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
34
|
+
connection_pool (>= 2.2.5)
|
35
|
+
drb
|
30
36
|
i18n (>= 1.6, < 2)
|
31
37
|
minitest (>= 5.1)
|
38
|
+
mutex_m
|
32
39
|
tzinfo (~> 2.0)
|
33
|
-
addressable (2.8.
|
40
|
+
addressable (2.8.5)
|
34
41
|
public_suffix (>= 2.0.2, < 6.0)
|
35
42
|
apparition (0.6.0)
|
36
43
|
capybara (~> 3.13, < 4)
|
37
44
|
websocket-driver (>= 0.6.5)
|
38
45
|
ast (2.4.2)
|
46
|
+
base64 (0.1.1)
|
47
|
+
bigdecimal (3.1.4)
|
39
48
|
builder (3.2.4)
|
40
49
|
capybara (3.39.2)
|
41
50
|
addressable
|
@@ -47,78 +56,101 @@ GEM
|
|
47
56
|
regexp_parser (>= 1.5, < 3.0)
|
48
57
|
xpath (~> 3.2)
|
49
58
|
concurrent-ruby (1.2.2)
|
59
|
+
connection_pool (2.4.1)
|
50
60
|
crass (1.0.6)
|
51
61
|
curb (1.0.5)
|
52
62
|
dotenv (2.7.6)
|
63
|
+
drb (2.1.1)
|
64
|
+
ruby2_keywords
|
53
65
|
erubi (1.12.0)
|
54
66
|
ethon (0.16.0)
|
55
67
|
ffi (>= 1.15.0)
|
56
68
|
ffi (1.15.5)
|
57
69
|
i18n (1.14.1)
|
58
70
|
concurrent-ruby (~> 1.0)
|
71
|
+
io-console (0.6.0)
|
72
|
+
irb (1.8.1)
|
73
|
+
rdoc
|
74
|
+
reline (>= 0.3.8)
|
59
75
|
json (2.6.3)
|
76
|
+
language_server-protocol (3.17.0.3)
|
60
77
|
loofah (2.21.3)
|
61
78
|
crass (~> 1.0.2)
|
62
79
|
nokogiri (>= 1.12.0)
|
63
80
|
matrix (0.4.2)
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
nokogiri (1.15.
|
81
|
+
mini_mime (1.1.5)
|
82
|
+
minitest (5.20.0)
|
83
|
+
mutex_m (0.1.2)
|
84
|
+
nokogiri (1.15.4-arm64-darwin)
|
68
85
|
racc (~> 1.4)
|
69
|
-
oj (3.
|
86
|
+
oj (3.16.1)
|
70
87
|
parallel (1.23.0)
|
71
88
|
parser (3.2.2.3)
|
72
89
|
ast (~> 2.4.1)
|
73
90
|
racc
|
74
|
-
|
75
|
-
|
76
|
-
|
91
|
+
psych (5.1.0)
|
92
|
+
stringio
|
93
|
+
public_suffix (5.0.3)
|
94
|
+
racc (1.7.1)
|
95
|
+
rack (3.0.8)
|
96
|
+
rack-session (2.0.0)
|
97
|
+
rack (>= 3.0.0)
|
77
98
|
rack-test (2.1.0)
|
78
99
|
rack (>= 1.3)
|
79
|
-
|
80
|
-
|
100
|
+
rackup (2.1.0)
|
101
|
+
rack (>= 3)
|
102
|
+
webrick (~> 1.8)
|
103
|
+
rails-dom-testing (2.2.0)
|
104
|
+
activesupport (>= 5.0.0)
|
105
|
+
minitest
|
81
106
|
nokogiri (>= 1.6)
|
82
107
|
rails-html-sanitizer (1.6.0)
|
83
108
|
loofah (~> 2.21)
|
84
109
|
nokogiri (~> 1.14)
|
85
|
-
railties (7.0.
|
86
|
-
actionpack (= 7.0.
|
87
|
-
activesupport (= 7.0.
|
88
|
-
|
110
|
+
railties (7.1.0.beta1)
|
111
|
+
actionpack (= 7.1.0.beta1)
|
112
|
+
activesupport (= 7.1.0.beta1)
|
113
|
+
irb
|
114
|
+
rackup (>= 1.0.0)
|
89
115
|
rake (>= 12.2)
|
90
|
-
thor (~> 1.0)
|
91
|
-
zeitwerk (~> 2.
|
116
|
+
thor (~> 1.0, >= 1.2.2)
|
117
|
+
zeitwerk (~> 2.6)
|
92
118
|
rainbow (3.1.1)
|
93
119
|
rake (13.0.6)
|
120
|
+
rdoc (6.5.0)
|
121
|
+
psych (>= 4.0.0)
|
94
122
|
regexp_parser (2.8.1)
|
95
|
-
|
96
|
-
|
123
|
+
reline (0.3.8)
|
124
|
+
io-console (~> 0.5)
|
125
|
+
rexml (3.2.6)
|
126
|
+
rubocop (1.56.3)
|
127
|
+
base64 (~> 0.1.1)
|
97
128
|
json (~> 2.3)
|
129
|
+
language_server-protocol (>= 3.17.0)
|
98
130
|
parallel (~> 1.10)
|
99
131
|
parser (>= 3.2.2.3)
|
100
132
|
rainbow (>= 2.2.2, < 4.0)
|
101
133
|
regexp_parser (>= 1.8, < 3.0)
|
102
134
|
rexml (>= 3.2.5, < 4.0)
|
103
|
-
rubocop-ast (>= 1.28.
|
135
|
+
rubocop-ast (>= 1.28.1, < 2.0)
|
104
136
|
ruby-progressbar (~> 1.7)
|
105
137
|
unicode-display_width (>= 2.4.0, < 3.0)
|
106
138
|
rubocop-ast (1.29.0)
|
107
139
|
parser (>= 3.2.1.0)
|
108
140
|
rubocop-md (1.2.0)
|
109
141
|
rubocop (>= 1.0)
|
110
|
-
rubocop-minitest (0.31.
|
142
|
+
rubocop-minitest (0.31.1)
|
111
143
|
rubocop (>= 1.39, < 2.0)
|
112
144
|
rubocop-packaging (0.5.2)
|
113
145
|
rubocop (>= 1.33, < 2.0)
|
114
|
-
rubocop-performance (1.
|
146
|
+
rubocop-performance (1.19.0)
|
115
147
|
rubocop (>= 1.7.0, < 2.0)
|
116
148
|
rubocop-ast (>= 0.4.0)
|
117
149
|
rubocop-rails (2.19.1)
|
118
150
|
activesupport (>= 4.2.0)
|
119
151
|
rack (>= 1.1)
|
120
152
|
rubocop (>= 1.33.0, < 2.0)
|
121
|
-
rubocop-rails_config (1.
|
153
|
+
rubocop-rails_config (1.14.1)
|
122
154
|
railties (>= 5.0)
|
123
155
|
rubocop (>= 1.48.0)
|
124
156
|
rubocop-ast (>= 1.26.0)
|
@@ -128,26 +160,29 @@ GEM
|
|
128
160
|
rubocop-performance (~> 1.11)
|
129
161
|
rubocop-rails (~> 2.0)
|
130
162
|
ruby-progressbar (1.13.0)
|
163
|
+
ruby2_keywords (0.0.5)
|
131
164
|
rubyzip (2.3.2)
|
132
|
-
selenium-devtools (0.
|
165
|
+
selenium-devtools (0.116.0)
|
133
166
|
selenium-webdriver (~> 4.2)
|
134
|
-
selenium-webdriver (4.
|
167
|
+
selenium-webdriver (4.12.0)
|
135
168
|
rexml (~> 3.2, >= 3.2.5)
|
136
169
|
rubyzip (>= 1.2.2, < 3.0)
|
137
170
|
websocket (~> 1.0)
|
171
|
+
stringio (3.0.8)
|
138
172
|
thor (1.2.2)
|
139
173
|
typhoeus (1.4.0)
|
140
174
|
ethon (>= 0.9.0)
|
141
175
|
tzinfo (2.0.6)
|
142
176
|
concurrent-ruby (~> 1.0)
|
143
177
|
unicode-display_width (2.4.2)
|
178
|
+
webrick (1.8.1)
|
144
179
|
websocket (1.2.9)
|
145
|
-
websocket-driver (0.7.
|
180
|
+
websocket-driver (0.7.6)
|
146
181
|
websocket-extensions (>= 0.1.0)
|
147
182
|
websocket-extensions (0.1.5)
|
148
183
|
xpath (3.2.0)
|
149
184
|
nokogiri (~> 1.8)
|
150
|
-
zeitwerk (2.6.
|
185
|
+
zeitwerk (2.6.11)
|
151
186
|
|
152
187
|
PLATFORMS
|
153
188
|
arm64-darwin-22
|
@@ -156,6 +191,7 @@ DEPENDENCIES
|
|
156
191
|
curb (~> 1.0, >= 1.0.5)
|
157
192
|
dotenv (~> 2.7.6)
|
158
193
|
minitest (~> 5.0)
|
194
|
+
rack (= 3.0.8)
|
159
195
|
rake (~> 13.0)
|
160
196
|
rubocop (~> 1.7)
|
161
197
|
rubocop-rails (~> 2.19.1)
|
@@ -68,14 +68,12 @@ module Zorki
|
|
68
68
|
page.driver.browser.intercept do |request, &continue|
|
69
69
|
# This passes the request forward unmodified, since we only care about the response
|
70
70
|
# puts "checking request: #{request.url}"
|
71
|
-
|
72
71
|
continue.call(request) && next unless request.url.include?(subpage_search)
|
73
72
|
|
74
73
|
continue.call(request) do |response|
|
75
74
|
# Check if not a CORS prefetch and finish up if not
|
76
|
-
if response.body.
|
75
|
+
if !response.body.empty? && response.body
|
77
76
|
check_passed = true
|
78
|
-
|
79
77
|
unless additional_search_parameters.nil?
|
80
78
|
body_to_check = Oj.load(response.body)
|
81
79
|
|
@@ -117,7 +115,7 @@ module Zorki
|
|
117
115
|
# end
|
118
116
|
|
119
117
|
elements = doc.search("script").map do |element|
|
120
|
-
|
118
|
+
element_json = nil
|
121
119
|
begin
|
122
120
|
element_json = JSON.parse(element)
|
123
121
|
|
data/lib/zorki/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zorki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christopher Guess
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|
@@ -145,7 +145,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
145
|
- !ruby/object:Gem::Version
|
146
146
|
version: '0'
|
147
147
|
requirements: []
|
148
|
-
rubygems_version: 3.4.
|
148
|
+
rubygems_version: 3.4.10
|
149
149
|
signing_key:
|
150
150
|
specification_version: 4
|
151
151
|
summary: A gem to scrape Instagram pages for archive purposes.
|