curlyq 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Curl
4
+ # Class for CURLing a JSON response
5
+ class Json
6
+ attr_reader :url, :code, :json, :headers
7
+
8
+ def to_data
9
+ {
10
+ url: @url,
11
+ code: @code,
12
+ json: @json,
13
+ headers: @headers
14
+ }
15
+ end
16
+
17
+ ##
18
+ ## Create a new Curl::Json page object
19
+ ##
20
+ ## @param url [String] The url to curl
21
+ ## @param headers [Hash] The headers to send
22
+ ## @param compressed [Boolean] Expect compressed results
23
+ ##
24
+ ## @return [Curl::Json] Curl::Json object with url, code, parsed json, and response headers
25
+ ##
26
+ def initialize(url, headers: nil, compressed: false, symbolize_names: false)
27
+ @curl = TTY::Which.which('curl')
28
+ page = curl_json(url, headers: headers, compressed: compressed, symbolize_names: symbolize_names)
29
+
30
+ raise "Error retrieving #{url}" if page.nil? || page.empty?
31
+
32
+ @url = page[:url]
33
+ @code = page[:code]
34
+ @json = page[:json]
35
+ @headers = page[:headers]
36
+ end
37
+
38
+ def path(path, json = @json)
39
+ parts = path.split(/./)
40
+ target = json
41
+ parts.each do |part|
42
+ if part =~ /(?<key>[^\[]+)\[(?<int>\d+)\]/
43
+ target = target[key][int.to_i]
44
+ else
45
+ target = target[part]
46
+ end
47
+ end
48
+
49
+ target
50
+ end
51
+
52
+ private
53
+
54
+ ##
55
+ ## Curl the JSON contents
56
+ ##
57
+ ## @param url [String] The url
58
+ ## @param headers [Hash] The headers to send
59
+ ## @param compressed [Boolean] Expect compressed results
60
+ ##
61
+ ## @return [Hash] hash of url, code, headers, and parsed json
62
+ ##
63
+ def curl_json(url, headers: nil, compressed: false, symbolize_names: false)
64
+ flags = 'SsLi'
65
+ agents = [
66
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.1',
67
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.',
68
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.3',
69
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.'
70
+ ]
71
+
72
+ headers = headers.nil? ? '' : headers.map { |h, v| %(-H "#{h}: #{v}") }.join(' ')
73
+ compress = compressed ? '--compressed' : ''
74
+ source = `#{@curl} -#{flags} #{compress} #{headers} '#{url}' 2>/dev/null`
75
+ agent = 0
76
+ while source.nil? || source.empty?
77
+ source = `#{@curl} -#{flags} #{compress} -A "#{agents[agent]}" #{headers} '#{url}' 2>/dev/null`
78
+ break if agent >= agents.count - 1
79
+ end
80
+
81
+ return false if source.nil? || source.empty?
82
+
83
+ source.strip!
84
+
85
+ headers = {}
86
+ lines = source.split(/\r\n/)
87
+ code = lines[0].match(/(\d\d\d)/)[1]
88
+ lines.shift
89
+ lines.each_with_index do |line, idx|
90
+ if line =~ /^([\w-]+): (.*?)$/
91
+ m = Regexp.last_match
92
+ headers[m[1]] = m[2]
93
+ else
94
+ source = lines[idx..].join("\n")
95
+ break
96
+ end
97
+ end
98
+
99
+ json = source.strip.force_encoding('utf-8')
100
+ begin
101
+ json.gsub!(/[\u{1F600}-\u{1F6FF}]/, '')
102
+ { url: url, code: code, headers: headers, json: JSON.parse(json, symbolize_names: symbolize_names) }
103
+ rescue StandardError => e
104
+ { url: url, code: code, headers: headers, json: nil}
105
+ end
106
+ end
107
+ end
108
+ end
data/lib/curly/curl.rb ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ # import
4
+ require_relative 'curl/html'
5
+
6
+ # import
7
+ require_relative 'curl/json'
data/lib/curly/hash.rb ADDED
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Hash helpers
4
+ class ::Hash
5
+ # Extract data using a dot-syntax path
6
+ #
7
+ # @param path [String] The path
8
+ #
9
+ # @return Result of path query
10
+ #
11
+ def dot_query(path)
12
+ res = stringify_keys
13
+ out = []
14
+ q = path.split(/(?<![\d.])\./)
15
+ q.each do |pth|
16
+ el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)\]/
17
+ pth.sub!(/\[([0-9,.]+)\]/, '')
18
+ ats = []
19
+ at = []
20
+ while pth =~ /\[[+&,]?\w+ *[\^*$=<>]=? *\w+/
21
+ m = pth.match(/\[(?<com>[,+&])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+) */)
22
+ comp = [m['key'], m['op'], m['val']]
23
+ case m['com']
24
+ when ','
25
+ ats.push(comp)
26
+ at = []
27
+ else
28
+ at.push(comp)
29
+ end
30
+
31
+ pth.sub!(/\[(?<com>[,&+])? *(?<key>\w+) *(?<op>[\^*$=<>]{1,2}) *(?<val>\w+)/, '[')
32
+ end
33
+ ats.push(at) unless at.empty?
34
+ pth.sub!(/\[\]/, '')
35
+
36
+ return false if el.nil? && ats.empty? && !res.key?(pth)
37
+
38
+ res = res[pth] unless pth.empty?
39
+
40
+ if ats.count.positive?
41
+ while ats.count.positive?
42
+ atr = ats.shift
43
+
44
+ keepers = res.filter do |r|
45
+ evaluate_comp(r, atr)
46
+ end
47
+ out.concat(keepers)
48
+ end
49
+ else
50
+ out = res
51
+ end
52
+
53
+ out = out[eval(el)] if out.is_a?(Array) && el =~ /^[\d.,]+$/
54
+ end
55
+ out
56
+ end
57
+
58
+ ##
59
+ ## Evaluate a comparison
60
+ ##
61
+ ## @param r [Hash] hash of source elements and
62
+ ## comparison operators
63
+ ## @param atr [String] The attribute to compare
64
+ ##
65
+ ## @return [Boolean] whether the comparison passes or fails
66
+ ##
67
+ def evaluate_comp(r, atr)
68
+ keep = true
69
+
70
+ atr.each do |a|
71
+ key = a[0].to_sym
72
+ val = if a[2] =~ /^\d+$/
73
+ a[2].to_i
74
+ elsif a[2] =~ /^\d+\.\d+$/
75
+ a[2].to_f
76
+ else
77
+ a[2]
78
+ end
79
+
80
+ if !r.key?(key)
81
+ keep = false
82
+ elsif r[key].is_a?(Array)
83
+ valid = r[key].filter do |k|
84
+ case a[1]
85
+ when /^\^/
86
+ k =~ /^#{a[2]}/i ? true : false
87
+ when /^\$/
88
+ k =~ /#{a[2]}$/i ? true : false
89
+ when /^\*/
90
+ k =~ /#{a[2]}/i ? true : false
91
+ else
92
+ k =~ /^#{a[2]}$/i ? true : false
93
+ end
94
+ end
95
+
96
+ keep = valid.count.positive?
97
+ elsif val.is_a?(Numeric) && a[1] =~ /^[<>=]{1,2}$/
98
+ k = r[key].to_i
99
+ comp = a[1] =~ /^=$/ ? '==' : a[1]
100
+ keep = eval("#{k}#{comp}#{val}")
101
+ else
102
+ keep = case a[1]
103
+ when /^\^/
104
+ r[key] =~ /^#{a[2]}/i ? true : false
105
+ when /^\$/
106
+ r[key] =~ /#{a[2]}$/i ? true : false
107
+ when /^\*/
108
+ r[key] =~ /#{a[2]}/i ? true : false
109
+ else
110
+ r[key] =~ /^#{a[2]}$/i ? true : false
111
+ end
112
+ end
113
+ end
114
+
115
+ keep
116
+ end
117
+
118
+ ##
119
+ ## Test if a hash contains a tag matching filter queries
120
+ ##
121
+ ## @param tag_name [String] The tag name
122
+ ## @param classes [String] The classes to match
123
+ ## @param id [String] The id attribute to
124
+ ## match
125
+ ## @param attribute [String] The attribute
126
+ ## @param operator [String] The operator, <>= *=
127
+ ## $= ^=
128
+ ## @param value [String] The value to match
129
+ ## @param descendant [Boolean] Check descendant tags
130
+ ##
131
+ def tag_match(tag_name, classes, id, attribute, operator, value, descendant: false)
132
+ tag = self
133
+ keep = true
134
+
135
+ keep = false if tag_name && !tag['tag'] =~ /^#{tag_name}$/i
136
+
137
+ if tag.key?('attrs') && tag['attrs']
138
+ if keep && id
139
+ tag_id = tag['attrs'].filter { |a| a['key'] == 'id' }.first['value']
140
+ keep = tag_id && tag_id =~ /#{id}/i
141
+ end
142
+
143
+ if keep && classes
144
+ cls = tag['attrs'].filter { |a| a['key'] == 'class' }.first
145
+ if cls
146
+ all = true
147
+ classes.each { |c| all = cls['value'].include?(c) }
148
+ keep = all
149
+ else
150
+ keep = false
151
+ end
152
+ end
153
+
154
+ if keep && attribute
155
+ attributes = tag['attrs'].filter { |a| a['key'] =~ /^#{attribute}$/i }
156
+ any = false
157
+ attributes.each do |a|
158
+ break if any
159
+
160
+ any = case operator
161
+ when /^*/
162
+ a['value'] =~ /#{value}/i
163
+ when /^\^/
164
+ a['value'] =~ /^#{value}/i
165
+ when /^\$/
166
+ a['value'] =~ /#{value}$/i
167
+ else
168
+ a['value'] =~ /^#{value}$/i
169
+ end
170
+ end
171
+ keep = any
172
+ end
173
+ end
174
+
175
+ return false if descendant && !keep
176
+
177
+ if !descendant && tag.key?('tags')
178
+ tags = tag['tags'].filter { |t| t.tag_match(tag_name, classes, id, attribute, operator, value) }
179
+ tags.count.positive?
180
+ else
181
+ keep
182
+ end
183
+ end
184
+
185
+ # Turn all keys into string
186
+ #
187
+ # If the hash has both a string and a symbol for key,
188
+ # keep the string value, discarding the symnbol value
189
+ #
190
+ # @return [Hash] a copy of the hash where all its
191
+ # keys are strings
192
+ #
193
+ def stringify_keys
194
+ each_with_object({}) do |(k, v), hsh|
195
+ next if k.is_a?(Symbol) && key?(k.to_s)
196
+
197
+ hsh[k.to_s] = v.is_a?(Hash) ? v.stringify_keys : v
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ ## Remove extra spaces and newlines from a string
5
+ ##
6
+ ## @return [String] cleaned string
7
+ ##
8
+ class ::String
9
+ def clean
10
+ gsub(/[\n ]+/m, ' ').gsub(/> +</, '><')
11
+ end
12
+
13
+ ##
14
+ ## Remove HTML tags from a string
15
+ ##
16
+ ## @return [String] stripped string
17
+ ##
18
+ def strip_tags
19
+ gsub(%r{</?.*?>}, '')
20
+ end
21
+
22
+ ##
23
+ ## Destructive version of #clean
24
+ ##
25
+ ## @see #clean
26
+ ##
27
+ def clean!
28
+ replace clean
29
+ end
30
+
31
+ ##
32
+ ## Destructive version of #strip_tags
33
+ ##
34
+ ## @see #strip_tags
35
+ ##
36
+ def strip_tags!
37
+ replace strip_tags
38
+ end
39
+
40
+ ##
41
+ ## Convert an image type string to a symbol
42
+ ##
43
+ ## @return Symbol :srcset, :img, :opengraph, :all
44
+ ##
45
+ def normalize_image_type(default = :all)
46
+ case self.to_s
47
+ when /^[sp]/i
48
+ :srcset
49
+ when /^i/i
50
+ :img
51
+ when /^o/i
52
+ :opengraph
53
+ else
54
+ default.is_a?(Symbol) ? default.to_sym : default.normalize_image_type
55
+ end
56
+ end
57
+
58
+ ##
59
+ ## Convert a browser type string to a symbol
60
+ ##
61
+ ## @return Symbol :chrome, :firefox
62
+ ##
63
+ def normalize_browser_type(default = :none)
64
+ case self.to_s
65
+ when /^c/i
66
+ :chrome
67
+ when /^f/i
68
+ :firefox
69
+ else
70
+ default.is_a?(Symbol) ? default.to_sym : default.normalize_browser_type
71
+ end
72
+ end
73
+
74
+ ##
75
+ ## Convert a screenshot type string to a symbol
76
+ ##
77
+ ## @return Symbol :full_page, :print_page, :visible
78
+ ##
79
+ def normalize_screenshot_type(default = :none)
80
+ case self.to_s
81
+ when /^f/i
82
+ :full_page
83
+ when /^p/i
84
+ :print_page
85
+ when /^v/i
86
+ :visible
87
+ else
88
+ default.is_a?(Symbol) ? default.to_sym : default.normalize_browser_type
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,3 @@
1
+ module Curly
2
+ VERSION = '0.0.2'
3
+ end
data/lib/curly.rb ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'curly/version'
4
+ require 'curly/hash'
5
+ require 'curly/string'
6
+ require 'curly/array'
7
+ require 'json'
8
+ require 'yaml'
9
+ require 'uri'
10
+ require 'tty-which'
11
+ require 'nokogiri'
12
+ require 'selenium-webdriver'
data/src/_README.md ADDED
@@ -0,0 +1,101 @@
1
+ <!--README--><!--GITHUB--># curlyq
2
+
3
+ [![Gem](https://img.shields.io/gem/v/na.svg)](https://rubygems.org/gems/curlyq)
4
+ [![GitHub license](https://img.shields.io/github/license/ttscoff/curlyq.svg)](./LICENSE.txt)
5
+
6
+ **A command line helper for curl and web scraping**
7
+
8
+ _If you find this useful, feel free to [buy me some coffee][donate]._
9
+ <!--END GITHUB-->
10
+
11
+ The current version of `curlyq` is <!--VER--><!--END VER-->.
12
+
13
+ `curlyq` is a command that provides a simple interface for curl, with additional features for things like extracting images and links, finding elements by CSS selector or XPath, getting detailed header info, and more. It also has rudimentary support for making calls to JSON endpoints easier, but it's expected that you'll use something like `jq` to parse the output.
14
+
15
+ [github]: https://github.com/ttscoff/curlyq/
16
+
17
+ ### Installation
18
+
19
+ Assuming you have Ruby and RubyGems installed, you can just run `gem install curlyq`. If you run into errors, try `gem install --user-install curlyq`, or use `sudo gem install curlyq`.
20
+
21
+ If you're using Homebrew, you have the option to install via [brew-gem](https://github.com/sportngin/brew-gem):
22
+
23
+ brew install brew-gem
24
+ brew gem install curlyq
25
+
26
+ If you don't have Ruby/RubyGems, you can install them pretty easily with Homebrew, rvm, or asdf.
27
+
28
+ ### Usage
29
+
30
+ Run `curlyq help` for a list of commands. Run `curlyq help SUBCOMMAND` for details on a particular subcommand and its options.
31
+
32
+ ```
33
+ @cli(bundle exec bin/curlyq help)
34
+ ```
35
+
36
+ #### Commands
37
+
38
+ curlyq makes use of subcommands, e.g. `curlyq html` or `curlyq extract`. Each subcommand takes its own options, but I've made an effort to standardize the choices between each command.
39
+
40
+ ##### extract
41
+
42
+ ```
43
+ @cli(bundle exec bin/curlyq help extract)
44
+ ```
45
+
46
+
47
+ ##### headlinks
48
+
49
+ ```
50
+ @cli(bundle exec bin/curlyq help headlinks)
51
+ ```
52
+
53
+ ##### html
54
+
55
+ ```
56
+ @cli(bundle exec bin/curlyq help html)
57
+ ```
58
+
59
+ ##### images
60
+
61
+ ```
62
+ @cli(bundle exec bin/curlyq help images)
63
+ ```
64
+
65
+ ##### json
66
+
67
+ ```
68
+ @cli(bundle exec bin/curlyq help json)
69
+ ```
70
+
71
+ ##### links
72
+
73
+ ```
74
+ @cli(bundle exec bin/curlyq help links)
75
+ ```
76
+
77
+ ##### scrape
78
+
79
+ ```
80
+ @cli(bundle exec bin/curlyq help scrape)
81
+ ```
82
+
83
+ ##### screenshot
84
+
85
+ ```
86
+ @cli(bundle exec bin/curlyq help screenshot)
87
+ ```
88
+
89
+ ##### tags
90
+
91
+ ```
92
+ @cli(bundle exec bin/curlyq help tags)
93
+ ```
94
+
95
+ <!--GITHUB-->
96
+ PayPal link: [paypal.me/ttscoff](https://paypal.me/ttscoff)
97
+
98
+ ## Changelog
99
+
100
+ See [CHANGELOG.md](https://github.com/ttscoff/na_gem/blob/master/CHANGELOG.md)
101
+ <!--END GITHUB--><!--END README-->
@@ -0,0 +1,14 @@
1
+ require_relative "test_helper"
2
+
3
+ class DefaultTest < Minitest::Test
4
+
5
+ def setup
6
+ end
7
+
8
+ def teardown
9
+ end
10
+
11
+ def test_the_truth
12
+ assert true
13
+ end
14
+ end
@@ -0,0 +1,4 @@
1
+ require "minitest/autorun"
2
+
3
+ # Add test libraries you want to use here, e.g. mocha
4
+ # Add helper classes or methods here, too