zorki 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa95bb5ca131ca6b7faed2aab200329579b77012cb079da75a0ad90a60daa5bd
4
- data.tar.gz: 82eb0cc29af2cfaeafa8774e027904959f953a50faa03d71dbcbeba9595ac520
3
+ metadata.gz: ec1dba1c80d66f33d1e50fc104511a5842091529c6d2b1e3f4758508ae37468a
4
+ data.tar.gz: b510d48cbc12d0e9b70fb17fb91aeffdad51d85f85e5813695df1c86279894db
5
5
  SHA512:
6
- metadata.gz: 127638ce83ed09be71f194ba3e5dc269374890a1bd89874c8f158896a7664d41724698a258916f63e373b1c6f3dd3b4d315fac49a15e17ff457035ec4d345120
7
- data.tar.gz: 105b7e148774a82640ed48bffe46b21d941ce0a18987f59ef0e79a3a058077fbd9dda651811bbc421ff9fcc8323de152b2ac5afbff0e5b9d4e5c65d27f5ac85c
6
+ metadata.gz: 6463fc59b818e21d4c515212d04549b7a3b925433d7e9df79ff376a786f74e028ab99146b8422c64384470443d88835854717e3a19a7c68bb3a767e7277f1c39
7
+ data.tar.gz: 173b570bb5eb62d759a488c21aff0092c633db1f0f1a4fa771e46ad32141a2016386f698e724cbc1a7dced5ad66a9d7b22add8f7dd533d8dc88ec009ca9a2814
data/Gemfile.lock CHANGED
@@ -1,35 +1,36 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- zorki (0.1.9)
4
+ zorki (0.1.25)
5
5
  apparition
6
6
  capybara
7
7
  oj
8
8
  selenium-devtools
9
- selenium-webdriver (~> 4.19)
9
+ selenium-webdriver (~> 4.24.0)
10
10
  typhoeus
11
11
 
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- activesupport (7.1.3.2)
15
+ activesupport (7.2.1)
16
16
  base64
17
17
  bigdecimal
18
- concurrent-ruby (~> 1.0, >= 1.0.2)
18
+ concurrent-ruby (~> 1.0, >= 1.3.1)
19
19
  connection_pool (>= 2.2.5)
20
20
  drb
21
21
  i18n (>= 1.6, < 2)
22
+ logger (>= 1.4.2)
22
23
  minitest (>= 5.1)
23
- mutex_m
24
- tzinfo (~> 2.0)
25
- addressable (2.8.6)
26
- public_suffix (>= 2.0.2, < 6.0)
24
+ securerandom (>= 0.3)
25
+ tzinfo (~> 2.0, >= 2.0.5)
26
+ addressable (2.8.7)
27
+ public_suffix (>= 2.0.2, < 7.0)
27
28
  apparition (0.6.0)
28
29
  capybara (~> 3.13, < 4)
29
30
  websocket-driver (>= 0.6.5)
30
31
  ast (2.4.2)
31
32
  base64 (0.2.0)
32
- bigdecimal (3.1.7)
33
+ bigdecimal (3.1.8)
33
34
  capybara (3.40.0)
34
35
  addressable
35
36
  matrix
@@ -39,75 +40,76 @@ GEM
39
40
  rack-test (>= 0.6.3)
40
41
  regexp_parser (>= 1.5, < 3.0)
41
42
  xpath (~> 3.2)
42
- concurrent-ruby (1.2.3)
43
+ concurrent-ruby (1.3.4)
43
44
  connection_pool (2.4.1)
44
- curb (1.0.5)
45
- debug (1.9.1)
45
+ curb (1.0.6)
46
+ debug (1.9.2)
46
47
  irb (~> 1.10)
47
48
  reline (>= 0.3.8)
48
49
  dotenv (2.7.6)
49
50
  drb (2.2.1)
50
51
  ethon (0.16.0)
51
52
  ffi (>= 1.15.0)
52
- ffi (1.16.3)
53
- i18n (1.14.4)
53
+ ffi (1.17.0-arm64-darwin)
54
+ i18n (1.14.5)
54
55
  concurrent-ruby (~> 1.0)
55
56
  io-console (0.7.2)
56
- irb (1.12.0)
57
- rdoc
57
+ irb (1.14.0)
58
+ rdoc (>= 4.0.0)
58
59
  reline (>= 0.4.2)
59
- json (2.7.1)
60
+ json (2.7.2)
60
61
  language_server-protocol (3.17.0.3)
62
+ logger (1.6.1)
61
63
  matrix (0.4.2)
62
64
  mini_mime (1.1.5)
63
- minitest (5.22.3)
64
- mutex_m (0.2.0)
65
- nokogiri (1.16.3-arm64-darwin)
65
+ minitest (5.25.1)
66
+ nokogiri (1.16.7-arm64-darwin)
66
67
  racc (~> 1.4)
67
- oj (3.16.3)
68
+ oj (3.16.6)
68
69
  bigdecimal (>= 3.0)
69
- parallel (1.24.0)
70
- parser (3.3.0.5)
70
+ ostruct (>= 0.2)
71
+ ostruct (0.6.0)
72
+ parallel (1.26.3)
73
+ parser (3.3.5.0)
71
74
  ast (~> 2.4.1)
72
75
  racc
73
76
  psych (5.1.2)
74
77
  stringio
75
- public_suffix (5.0.4)
76
- racc (1.7.3)
78
+ public_suffix (6.0.1)
79
+ racc (1.8.1)
77
80
  rack (3.0.8)
78
81
  rack-test (2.1.0)
79
82
  rack (>= 1.3)
80
83
  rainbow (3.1.1)
81
- rake (13.1.0)
82
- rdoc (6.6.3.1)
84
+ rake (13.2.1)
85
+ rdoc (6.7.0)
83
86
  psych (>= 4.0.0)
84
- regexp_parser (2.9.0)
85
- reline (0.5.0)
87
+ regexp_parser (2.9.2)
88
+ reline (0.5.10)
86
89
  io-console (~> 0.5)
87
- rexml (3.2.6)
88
- rubocop (1.62.1)
90
+ rexml (3.3.7)
91
+ rubocop (1.66.1)
89
92
  json (~> 2.3)
90
93
  language_server-protocol (>= 3.17.0)
91
94
  parallel (~> 1.10)
92
95
  parser (>= 3.3.0.2)
93
96
  rainbow (>= 2.2.2, < 4.0)
94
- regexp_parser (>= 1.8, < 3.0)
95
- rexml (>= 3.2.5, < 4.0)
96
- rubocop-ast (>= 1.31.1, < 2.0)
97
+ regexp_parser (>= 2.4, < 3.0)
98
+ rubocop-ast (>= 1.32.2, < 2.0)
97
99
  ruby-progressbar (~> 1.7)
98
100
  unicode-display_width (>= 2.4.0, < 3.0)
99
- rubocop-ast (1.31.2)
100
- parser (>= 3.3.0.4)
101
- rubocop-md (1.2.2)
102
- rubocop (>= 1.0)
103
- rubocop-minitest (0.35.0)
101
+ rubocop-ast (1.32.3)
102
+ parser (>= 3.3.1.0)
103
+ rubocop-md (1.2.3)
104
+ rubocop (>= 1.45)
105
+ rubocop-minitest (0.36.0)
104
106
  rubocop (>= 1.61, < 2.0)
105
107
  rubocop-ast (>= 1.31.1, < 2.0)
106
108
  rubocop-packaging (0.5.2)
107
109
  rubocop (>= 1.33, < 2.0)
108
- rubocop-performance (1.20.2)
110
+ rubocop-performance (1.21.1)
109
111
  rubocop (>= 1.48.1, < 2.0)
110
- rubocop-ast (>= 1.30.0, < 2.0)
112
+ rubocop-ast (>= 1.31.1, < 2.0)
111
113
  rubocop-rails (2.19.1)
112
114
  activesupport (>= 4.2.0)
113
115
  rack (>= 1.1)
@@ -122,20 +124,22 @@ GEM
122
124
  rubocop-rails (~> 2.0)
123
125
  ruby-progressbar (1.13.0)
124
126
  rubyzip (2.3.2)
125
- selenium-devtools (0.123.0)
127
+ securerandom (0.3.1)
128
+ selenium-devtools (0.128.0)
126
129
  selenium-webdriver (~> 4.2)
127
- selenium-webdriver (4.19.0)
130
+ selenium-webdriver (4.24.0)
128
131
  base64 (~> 0.2)
132
+ logger (~> 1.4)
129
133
  rexml (~> 3.2, >= 3.2.5)
130
134
  rubyzip (>= 1.2.2, < 3.0)
131
135
  websocket (~> 1.0)
132
- stringio (3.1.0)
136
+ stringio (3.1.1)
133
137
  typhoeus (1.4.1)
134
138
  ethon (>= 0.9.0)
135
139
  tzinfo (2.0.6)
136
140
  concurrent-ruby (~> 1.0)
137
- unicode-display_width (2.5.0)
138
- websocket (1.2.10)
141
+ unicode-display_width (2.6.0)
142
+ websocket (1.2.11)
139
143
  websocket-driver (0.7.6)
140
144
  websocket-extensions (>= 0.1.0)
141
145
  websocket-extensions (0.1.5)
@@ -141,12 +141,14 @@ module Zorki
141
141
  end
142
142
 
143
143
  if elements&.empty?
144
+ # debugger
144
145
  raise ContentUnavailableError.new("Cannot find anything", additional_data: { page_source: page.driver.browser.page_source, elements: elements })
145
146
  end
146
147
 
147
148
  return elements
148
149
  end
149
150
 
151
+ # debugger if response_body.nil?
150
152
  raise ContentUnavailableError.new("Response body nil") if response_body.nil?
151
153
  Oj.load(response_body)
152
154
  ensure
@@ -16,13 +16,30 @@ module Zorki
16
16
  # - *description
17
17
  # - *links
18
18
  # - *Profile image
19
- login
20
19
 
21
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,full_name")
22
- graphql_script = graphql_script.first if graphql_script.class == Array
20
+ graphql_script = nil
21
+ count = 0
22
+ loop do
23
+ raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username }) if count > 3
23
24
 
24
- if graphql_script.nil?
25
- graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
25
+ print "Scraping user #{username}... (attempt #{count + 1})\n"
26
+ begin
27
+ login
28
+
29
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "graphql", "data,user,media_count")
30
+ graphql_script = graphql_script.first if graphql_script.class == Array
31
+
32
+ if graphql_script.nil?
33
+ graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
34
+ end
35
+ rescue Zorki::ContentUnavailableError => e
36
+ count += 1
37
+ page.driver.browser.navigate.to("https://www.instagram.com") # we want to go back to the main page so we start from scratch
38
+ sleep 10
39
+ next
40
+ end
41
+
42
+ break
26
43
  end
27
44
 
28
45
  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
@@ -36,10 +53,14 @@ module Zorki
36
53
  ["https://schema.org/FilmAction", "http://schema.org/WriteAction"].include?(stat["interactionType"])
37
54
  end.first
38
55
 
56
+ # number_of_posts = graphql_script["data"]["user"]["media_count"] if number_of_posts.nil?
57
+
39
58
  number_of_followers = graphql_script["interactionStatistic"].select do |stat|
40
59
  stat["interactionType"] == "http://schema.org/FollowAction"
41
60
  end.first
42
61
 
62
+ # number_of_followers = graphql_script["data"]["user"]["follower_count"] if number_of_followers.nil?
63
+
43
64
  begin
44
65
  profile_image_url = user["image"]
45
66
  {
@@ -76,7 +97,8 @@ module Zorki
76
97
  profile_image_url: profile_image_url
77
98
  }
78
99
  end
79
- rescue Zorki::ContentUnavailableError
100
+ rescue Zorki::ContentUnavailableError => e
101
+ debugger
80
102
  raise Zorki::UserScrapingError.new("Zorki could not find user #{username}", additional_data: { username: username })
81
103
  end
82
104
  end
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.24"
4
+ VERSION = "0.1.26"
5
5
  end
data/lib/zorki.rb CHANGED
@@ -28,7 +28,7 @@ module Zorki
28
28
  end
29
29
 
30
30
  def to_honeybadger_context
31
- additional_data
31
+ @additional_data
32
32
  end
33
33
  end
34
34
 
@@ -39,7 +39,7 @@ module Zorki
39
39
  end
40
40
 
41
41
  def to_honeybadger_context
42
- additional_data
42
+ @additional_data
43
43
  end
44
44
  end
45
45
 
data/zorki.gemspec CHANGED
@@ -33,7 +33,7 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency "apparition" # A Chrome driver for Capybara
34
34
  spec.add_dependency "typhoeus" # For making API requests
35
35
  spec.add_dependency "oj" # A faster JSON parser/loader than stdlib
36
- spec.add_dependency "selenium-webdriver", "~> 4.19" # Webdriver selenium
36
+ spec.add_dependency "selenium-webdriver", "~> 4.24.0" # Webdriver selenium
37
37
  spec.add_dependency "selenium-devtools" # Allow us to intercept requests
38
38
 
39
39
  spec.add_development_dependency "debug"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.24
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-06-19 00:00:00.000000000 Z
11
+ date: 2024-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
@@ -72,14 +72,14 @@ dependencies:
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '4.19'
75
+ version: 4.24.0
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '4.19'
82
+ version: 4.24.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: selenium-devtools
85
85
  requirement: !ruby/object:Gem::Requirement