zorki 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e866c6873fc604f1720faeaf9425ff0557a6ef9947e9a9e4f657cc255a8d4665
4
- data.tar.gz: e97fa8e84dd02edc9dd47beb9d66fe2b3cab4704297f16005dc09adf68eeb2e4
3
+ metadata.gz: 6dd3c28132011c0d9d42875803face311af01a50bbbfa9cf4f07ca89a63029d4
4
+ data.tar.gz: 91fa19abbd41551e4cb55ff34ce3977795903312c06506bf9bb450796cca7189
5
5
  SHA512:
6
- metadata.gz: f5d6f36fede35c5d54289f6f7a41098e5374f74e8bbbf18ebd66486e4c049fdd064e063a7787000ed5997a9460496d9c5310795585a9098b1bfcbf891c7277e6
7
- data.tar.gz: 8a79812eb8d312fef165868bbaad85cc4a7cd0f67ed04fb3e7cb0246e417ec5eefb89a54d3eec7ff78b3a3b38a9df89ae8c82423ea7fbb4ab04d160c86b397f5
6
+ metadata.gz: '087a14d77466f9b5f70014f5f09ae97542d2b8083499917d0397815dd4c298da07da9c4b28fb31e6f7c1949a356e9c4d32b8bb8e7551054ca1fe5c37c972515b'
7
+ data.tar.gz: 5b46e4e7edb229d89ff5f9bb740dac2959c314e09cf5b9a770c8b16779509266a8bd06d38b1a1208f547cfef22e10fd139b0f32dbea9c89dba5413874399743a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- zorki (0.1.1)
4
+ zorki (0.1.2)
5
5
  apparition
6
6
  capybara
7
7
  oj
@@ -12,20 +12,20 @@ PATH
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- actionpack (7.0.4.3)
16
- actionview (= 7.0.4.3)
17
- activesupport (= 7.0.4.3)
18
- rack (~> 2.0, >= 2.2.0)
15
+ actionpack (7.0.5)
16
+ actionview (= 7.0.5)
17
+ activesupport (= 7.0.5)
18
+ rack (~> 2.0, >= 2.2.4)
19
19
  rack-test (>= 0.6.3)
20
20
  rails-dom-testing (~> 2.0)
21
21
  rails-html-sanitizer (~> 1.0, >= 1.2.0)
22
- actionview (7.0.4.3)
23
- activesupport (= 7.0.4.3)
22
+ actionview (7.0.5)
23
+ activesupport (= 7.0.5)
24
24
  builder (~> 3.1)
25
25
  erubi (~> 1.4)
26
26
  rails-dom-testing (~> 2.0)
27
27
  rails-html-sanitizer (~> 1.1, >= 1.2.0)
28
- activesupport (7.0.4.3)
28
+ activesupport (7.0.5)
29
29
  concurrent-ruby (~> 1.0, >= 1.0.2)
30
30
  i18n (>= 1.6, < 2)
31
31
  minitest (>= 5.1)
@@ -37,7 +37,7 @@ GEM
37
37
  websocket-driver (>= 0.6.5)
38
38
  ast (2.4.2)
39
39
  builder (3.2.4)
40
- capybara (3.39.1)
40
+ capybara (3.39.2)
41
41
  addressable
42
42
  matrix
43
43
  mini_mime (>= 0.1.3)
@@ -54,54 +54,56 @@ GEM
54
54
  ethon (0.16.0)
55
55
  ffi (>= 1.15.0)
56
56
  ffi (1.15.5)
57
- i18n (1.13.0)
57
+ i18n (1.14.1)
58
58
  concurrent-ruby (~> 1.0)
59
59
  json (2.6.3)
60
- loofah (2.21.2)
60
+ loofah (2.21.3)
61
61
  crass (~> 1.0.2)
62
62
  nokogiri (>= 1.12.0)
63
63
  matrix (0.4.2)
64
64
  method_source (1.0.0)
65
65
  mini_mime (1.1.2)
66
66
  minitest (5.18.0)
67
- nokogiri (1.14.4-arm64-darwin)
67
+ nokogiri (1.15.2-arm64-darwin)
68
68
  racc (~> 1.4)
69
- oj (3.14.3)
69
+ oj (3.15.0)
70
70
  parallel (1.23.0)
71
- parser (3.2.2.1)
71
+ parser (3.2.2.3)
72
72
  ast (~> 2.4.1)
73
+ racc
73
74
  public_suffix (5.0.1)
74
- racc (1.6.2)
75
+ racc (1.7.0)
75
76
  rack (2.2.7)
76
77
  rack-test (2.1.0)
77
78
  rack (>= 1.3)
78
79
  rails-dom-testing (2.0.3)
79
80
  activesupport (>= 4.2.0)
80
81
  nokogiri (>= 1.6)
81
- rails-html-sanitizer (1.5.0)
82
- loofah (~> 2.19, >= 2.19.1)
83
- railties (7.0.4.3)
84
- actionpack (= 7.0.4.3)
85
- activesupport (= 7.0.4.3)
82
+ rails-html-sanitizer (1.6.0)
83
+ loofah (~> 2.21)
84
+ nokogiri (~> 1.14)
85
+ railties (7.0.5)
86
+ actionpack (= 7.0.5)
87
+ activesupport (= 7.0.5)
86
88
  method_source
87
89
  rake (>= 12.2)
88
90
  thor (~> 1.0)
89
91
  zeitwerk (~> 2.5)
90
92
  rainbow (3.1.1)
91
93
  rake (13.0.6)
92
- regexp_parser (2.8.0)
94
+ regexp_parser (2.8.1)
93
95
  rexml (3.2.5)
94
- rubocop (1.51.0)
96
+ rubocop (1.52.1)
95
97
  json (~> 2.3)
96
98
  parallel (~> 1.10)
97
- parser (>= 3.2.0.0)
99
+ parser (>= 3.2.2.3)
98
100
  rainbow (>= 2.2.2, < 4.0)
99
101
  regexp_parser (>= 1.8, < 3.0)
100
102
  rexml (>= 3.2.5, < 4.0)
101
103
  rubocop-ast (>= 1.28.0, < 2.0)
102
104
  ruby-progressbar (~> 1.7)
103
105
  unicode-display_width (>= 2.4.0, < 3.0)
104
- rubocop-ast (1.28.1)
106
+ rubocop-ast (1.29.0)
105
107
  parser (>= 3.2.1.0)
106
108
  rubocop-md (1.2.0)
107
109
  rubocop (>= 1.0)
@@ -109,7 +111,7 @@ GEM
109
111
  rubocop (>= 1.39, < 2.0)
110
112
  rubocop-packaging (0.5.2)
111
113
  rubocop (>= 1.33, < 2.0)
112
- rubocop-performance (1.17.1)
114
+ rubocop-performance (1.18.0)
113
115
  rubocop (>= 1.7.0, < 2.0)
114
116
  rubocop-ast (>= 0.4.0)
115
117
  rubocop-rails (2.19.1)
@@ -127,9 +129,9 @@ GEM
127
129
  rubocop-rails (~> 2.0)
128
130
  ruby-progressbar (1.13.0)
129
131
  rubyzip (2.3.2)
130
- selenium-devtools (0.113.0)
132
+ selenium-devtools (0.114.0)
131
133
  selenium-webdriver (~> 4.2)
132
- selenium-webdriver (4.9.1)
134
+ selenium-webdriver (4.10.0)
133
135
  rexml (~> 3.2, >= 3.2.5)
134
136
  rubyzip (>= 1.2.2, < 3.0)
135
137
  websocket (~> 1.0)
@@ -23,6 +23,8 @@ module Zorki
23
23
  "data,xdt_api__v1__media__shortcode__web_info,items"
24
24
  )
25
25
 
26
+ graphql_object = graphql_object.first if graphql_object.kind_of?(Array)
27
+
26
28
  # For pages that have been marked misinfo the structure is very different than not
27
29
  # If it is a clean post then it's just a schema.org thing, but if it's misinfo it's the old
28
30
  # way of deeply nested stuff.
@@ -54,7 +56,8 @@ module Zorki
54
56
  else
55
57
  # We need to see if this is a single image post or a slideshow. We do that
56
58
  # by looking for a single image, if it's not there, we assume the alternative.
57
- graphql_object = graphql_object["data"]["xdt_api__v1__media__shortcode__web_info"]
59
+ # debugger
60
+ # graphql_object = graphql_object["data"]["xdt_api__v1__media__shortcode__web_info"]
58
61
 
59
62
  unless graphql_object["items"][0].has_key?("video_versions") && !graphql_object["items"][0]["video_versions"].nil?
60
63
  # Check if there is a slideshow or not
@@ -5,9 +5,9 @@ require "dotenv/load"
5
5
  require "oj"
6
6
  require "selenium-webdriver"
7
7
  require "logger"
8
- require "debug"
9
8
  require "securerandom"
10
9
  require "selenium/webdriver/remote/http/curb"
10
+ require "debug"
11
11
 
12
12
  # 2022-06-07 14:15:23 WARN Selenium [DEPRECATION] [:browser_options] :options as a parameter for driver initialization is deprecated. Use :capabilities with an Array of value capabilities/options if necessary instead.
13
13
 
@@ -112,12 +112,27 @@ module Zorki
112
112
  # TODO: put this before the whole load loop
113
113
  if response_body.nil?
114
114
  doc = Nokogiri::HTML(page.driver.browser.page_source)
115
- elements = doc.search("script").find_all do |e|
116
- e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
117
- end
115
+ # elements = doc.search("script").find_all do |e|
116
+ # e.attributes.has_key?("type") && e.attributes["type"].value == "application/ld+json"
117
+ # end
118
+
119
+ elements = doc.search("script").map do |element|
120
+ element_json = nil
121
+ begin
122
+ element_json = JSON.parse(element)
123
+
124
+ element_json = element_json["require"].first.last.first["__bbox"]["require"].first.last.last["__bbox"]["result"]["data"]["xdt_api__v1__media__shortcode__web_info"]
125
+ rescue StandardError => e
126
+ next
127
+ end
128
+
129
+ element_json
130
+ end.compact
118
131
 
119
- raise ContentUnavailableError if elements&.empty?
120
- return Oj.load(elements.first.text)
132
+ if elements&.empty?
133
+ raise ContentUnavailableError
134
+ end
135
+ return elements
121
136
  end
122
137
 
123
138
  raise ContentUnavailableError if response_body.nil?
@@ -19,6 +19,7 @@ module Zorki
19
19
  login
20
20
 
21
21
  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
22
+ graphql_script = graphql_script.first if graphql_script.class == Array
22
23
 
23
24
  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
24
25
  user = graphql_script["author"]
@@ -28,26 +29,28 @@ module Zorki
28
29
  raise Zorki::Error unless username == scraped_username
29
30
 
30
31
  number_of_posts = graphql_script["interactionStatistic"].select do |stat|
31
- stat["interactionType"] == "https://schema.org/FilmAction"
32
+ ["https://schema.org/FilmAction", "http://schema.org/WriteAction"].include?(stat["interactionType"])
32
33
  end.first
33
34
 
34
35
  number_of_followers = graphql_script["interactionStatistic"].select do |stat|
35
36
  stat["interactionType"] == "http://schema.org/FollowAction"
36
37
  end.first
37
38
 
38
- profile_image_url = user["image"]
39
- {
40
- name: user["name"],
41
- username: username,
42
- number_of_posts: Integer(number_of_posts["userInteractionCount"]),
43
- number_of_followers: Integer(number_of_followers["userInteractionCount"]),
44
- # number_of_following: user["edge_follow"]["count"],
45
- verified: user["is_verified"], # todo
46
- profile: graphql_script["description"],
47
- profile_link: user["sameAs"],
48
- profile_image: Zorki.retrieve_media(profile_image_url),
49
- profile_image_url: profile_image_url
50
- }
39
+ begin
40
+ profile_image_url = user["image"]
41
+ {
42
+ name: user["name"],
43
+ username: username,
44
+ number_of_posts: Integer(number_of_posts["userInteractionCount"]),
45
+ number_of_followers: Integer(number_of_followers["userInteractionCount"]),
46
+ # number_of_following: user["edge_follow"]["count"],
47
+ verified: user["is_verified"], # todo
48
+ profile: graphql_script["description"],
49
+ profile_link: user["sameAs"],
50
+ profile_image: Zorki.retrieve_media(profile_image_url),
51
+ profile_image_url: profile_image_url
52
+ }
53
+ end
51
54
  else
52
55
  user = graphql_script["data"]["user"]
53
56
 
data/lib/zorki/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Zorki
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.4"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zorki
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Guess
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-27 00:00:00.000000000 Z
11
+ date: 2023-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara