sinew 2.0.3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.gitignore +3 -5
  4. data/.rubocop.yml +31 -46
  5. data/Gemfile +9 -0
  6. data/Gemfile.lock +124 -0
  7. data/README.md +146 -81
  8. data/Rakefile +36 -20
  9. data/bin/sinew +13 -39
  10. data/lib/sinew.rb +23 -10
  11. data/lib/sinew/args.rb +53 -0
  12. data/lib/sinew/base.rb +251 -0
  13. data/lib/sinew/csv.rb +89 -0
  14. data/lib/sinew/main.rb +45 -98
  15. data/lib/sinew/middleware/log_formatter.rb +23 -0
  16. data/lib/sinew/nokogiri_ext.rb +12 -21
  17. data/lib/sinew/response.rb +39 -99
  18. data/lib/sinew/version.rb +1 -1
  19. data/sample.rb +13 -0
  20. data/sample.sinew +4 -4
  21. data/sinew.gemspec +26 -25
  22. metadata +46 -108
  23. data/.travis.yml +0 -4
  24. data/.vscode/extensions.json +0 -3
  25. data/.vscode/settings.json +0 -15
  26. data/lib/sinew/cache.rb +0 -79
  27. data/lib/sinew/core_ext.rb +0 -59
  28. data/lib/sinew/dsl.rb +0 -114
  29. data/lib/sinew/output.rb +0 -149
  30. data/lib/sinew/request.rb +0 -151
  31. data/lib/sinew/runtime_options.rb +0 -28
  32. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  33. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  34. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  35. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  36. data/test/legacy/legacy.sinew +0 -2
  37. data/test/recipes/array_header.sinew +0 -6
  38. data/test/recipes/basic.sinew +0 -8
  39. data/test/recipes/dups.sinew +0 -7
  40. data/test/recipes/implicit_header.sinew +0 -5
  41. data/test/recipes/limit.sinew +0 -11
  42. data/test/recipes/noko.sinew +0 -9
  43. data/test/recipes/uri.sinew +0 -11
  44. data/test/recipes/xml.sinew +0 -8
  45. data/test/test.html +0 -45
  46. data/test/test_cache.rb +0 -69
  47. data/test/test_helper.rb +0 -123
  48. data/test/test_legacy.rb +0 -23
  49. data/test/test_main.rb +0 -34
  50. data/test/test_nokogiri_ext.rb +0 -18
  51. data/test/test_output.rb +0 -56
  52. data/test/test_recipes.rb +0 -60
  53. data/test/test_requests.rb +0 -135
  54. data/test/test_utf8.rb +0 -39
@@ -1,59 +0,0 @@
1
- #
2
- # A few core extensions brought over from ActiveSupport. These are handy for
3
- # parsing.
4
- #
5
-
6
- class String
7
- def squish
8
- dup.squish!
9
- end
10
-
11
- def squish!
12
- strip!
13
- gsub!(/\s+/, ' ')
14
- self
15
- end
16
-
17
- def first(limit = 1)
18
- if limit == 0
19
- ''
20
- elsif limit >= size
21
- dup
22
- else
23
- self[0..limit - 1]
24
- end
25
- end
26
-
27
- def last(limit = 1)
28
- if limit == 0
29
- ''
30
- elsif limit >= size
31
- dup
32
- else
33
- self[-limit..-1]
34
- end
35
- end
36
-
37
- alias starts_with? start_with?
38
- alias ends_with? end_with?
39
- end
40
-
41
- #
42
- # blank?/present?
43
- #
44
-
45
- class Object
46
- def blank?
47
- respond_to?(:empty?) ? !!empty? : !self
48
- end
49
-
50
- def present?
51
- !blank?
52
- end
53
- end
54
-
55
- class String
56
- def blank?
57
- !!(self =~ /\A\s*\z/)
58
- end
59
- end
data/lib/sinew/dsl.rb DELETED
@@ -1,114 +0,0 @@
1
- require 'awesome_print'
2
- require 'cgi'
3
-
4
- #
5
- # The DSL available to .sinew files.
6
- #
7
-
8
- module Sinew
9
- class DSL
10
- # this is used to break out of --limit
11
- class LimitError < StandardError; end
12
-
13
- attr_reader :sinew, :raw, :uri, :elapsed
14
-
15
- def initialize(sinew)
16
- @sinew = sinew
17
- end
18
-
19
- def run
20
- tm = Time.now
21
- begin
22
- recipe = sinew.options[:recipe]
23
- instance_eval(File.read(recipe, mode: 'rb'), recipe)
24
- rescue LimitError
25
- # ignore - this is flow control for --limit
26
- end
27
- @elapsed = Time.now - tm
28
- end
29
-
30
- #
31
- # request
32
- #
33
-
34
- def get(url, query = {})
35
- http('get', url, query: query)
36
- end
37
-
38
- def post(url, form = {})
39
- body = form
40
- headers = {
41
- 'Content-Type' => 'application/x-www-form-urlencoded',
42
- }
43
- http('post', url, body: body, headers: headers)
44
- end
45
-
46
- def post_json(url, json = {})
47
- body = json.to_json
48
- headers = {
49
- 'Content-Type' => 'application/json',
50
- }
51
- http('post', url, body: body, headers: headers)
52
- end
53
-
54
- def http(method, url, options = {})
55
- # these need to be cleared before each request
56
- %i[@html @noko @xml @json].each do |i|
57
- instance_variable_set(i, nil)
58
- end
59
-
60
- # fetch and make response available to callers
61
- response = sinew.http(method, url, options)
62
- @uri, @raw = response.uri, response.body
63
-
64
- # don't confuse the user
65
- nil
66
- end
67
-
68
- #
69
- # response
70
- #
71
-
72
- def html
73
- @html ||= begin
74
- s = raw.dup
75
- # squish!
76
- s.squish!
77
- # kill whitespace around tags
78
- s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
79
- s
80
- end
81
- end
82
-
83
- def noko
84
- @noko ||= Nokogiri::HTML(html)
85
- end
86
-
87
- def xml
88
- @xml ||= Nokogiri::XML(html)
89
- end
90
-
91
- def json
92
- @json ||= JSON.parse(raw, symbolize_names: true)
93
- end
94
-
95
- def url
96
- uri.to_s
97
- end
98
-
99
- #
100
- # csv
101
- #
102
-
103
- def csv_header(*args)
104
- sinew.output.header(args)
105
- end
106
-
107
- def csv_emit(row)
108
- sinew.output.emit(row)
109
- if sinew.output.count == sinew.options[:limit]
110
- raise LimitError.new
111
- end
112
- end
113
- end
114
- end
data/lib/sinew/output.rb DELETED
@@ -1,149 +0,0 @@
1
- require 'csv'
2
- require 'set'
3
- require 'stringex'
4
-
5
- #
6
- # Stringex customizations
7
- #
8
-
9
- # turn '&amp;' into '&', not 'and'
10
- Stringex::Localization::DefaultConversions::HTML_ENTITIES[:amp] = '&'
11
-
12
- #
13
- # CSV output.
14
- #
15
-
16
- module Sinew
17
- class Output
18
- attr_reader :sinew, :columns, :rows, :urls, :csv
19
-
20
- def initialize(sinew)
21
- @sinew = sinew
22
- @rows = []
23
- @urls = Set.new
24
- end
25
-
26
- def filename
27
- @filename ||= begin
28
- recipe = sinew.options[:recipe]
29
- ext = File.extname(recipe)
30
- if ext.empty?
31
- "#{recipe}.csv"
32
- else
33
- recipe.gsub(ext, '.csv')
34
- end
35
- end
36
- end
37
-
38
- def header(columns)
39
- sinew.banner("Writing to #{filename}...") if !sinew.quiet?
40
-
41
- columns = columns.flatten
42
- @columns = columns
43
-
44
- # open csv, write header row
45
- @csv = CSV.open(filename, 'wb')
46
- csv << columns
47
- end
48
-
49
- def emit(row)
50
- # implicit header if necessary
51
- header(row.keys) if !csv
52
-
53
- # don't allow duplicate urls
54
- return if dup_url?(row)
55
- rows << row.dup
56
-
57
- # map columns to row, and normalize along the way
58
- print = {}
59
- row = columns.map do |i|
60
- value = normalize(row[i])
61
- print[i] = value if value.present?
62
- value
63
- end
64
-
65
- # print
66
- sinew.vputs print.ai
67
-
68
- csv << row
69
- csv.flush
70
- end
71
-
72
- def count
73
- rows.length
74
- end
75
-
76
- def report
77
- return if count == 0
78
-
79
- sinew.banner("Got #{count} rows.")
80
-
81
- # calculate counts
82
- counts = Hash.new(0)
83
- rows.each do |row|
84
- row.each_pair { |k, v| counts[k] += 1 if v.present? }
85
- end
86
- # sort by counts
87
- cols = columns.sort_by { |i| [ -counts[i], i ] }
88
-
89
- # report
90
- len = cols.map { |i| i.to_s.length }.max
91
- fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
92
- cols.each do |col|
93
- $stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
94
- end
95
- end
96
-
97
- def normalize(s)
98
- # noko/array/misc => string
99
- s = case s
100
- when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
101
- s.inner_html
102
- when Array
103
- s.map(&:to_s).join('|')
104
- else
105
- s.to_s
106
- end
107
-
108
- # strip html tags. Note that we replace tags with spaces
109
- s = s.gsub(/<[^>]+>/, ' ')
110
-
111
- #
112
- # Below uses stringex
113
- #
114
- # github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
115
- # github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
116
- #
117
-
118
- # Converts MS Word 'smart punctuation' to ASCII
119
- s = s.convert_smart_punctuation
120
-
121
- # "&aacute;".convert_accented_html_entities # => "a"
122
- s = s.convert_accented_html_entities
123
-
124
- # &amp, &frac, etc.
125
- s = s.convert_miscellaneous_html_entities
126
-
127
- # convert unicode => regular characters
128
- s = s.to_ascii
129
-
130
- # squish
131
- s = s.squish
132
-
133
- s
134
- end
135
- protected :normalize
136
-
137
- def dup_url?(row)
138
- if url = row[:url]
139
- if urls.include?(url)
140
- sinew.warning("duplicate url: #{url}") if !sinew.quiet?
141
- return true
142
- end
143
- urls << url
144
- end
145
- false
146
- end
147
- protected :dup_url?
148
- end
149
- end
data/lib/sinew/request.rb DELETED
@@ -1,151 +0,0 @@
1
- require 'digest/md5'
2
- require 'httparty'
3
- require 'htmlentities'
4
-
5
- #
6
- # Process a single HTTP request. Mostly a wrapper around HTTParty.
7
- #
8
-
9
- module Sinew
10
- class Error < StandardError; end
11
-
12
- class Request
13
- HTML_ENTITIES = HTMLEntities.new
14
- VALID_METHODS = %w[get post patch put delete head options].freeze
15
-
16
- attr_reader :sinew, :method, :uri, :options, :cache_key
17
-
18
- # Options are largely compatible with HTTParty, except for :method.
19
- def initialize(sinew, method, url, options = {})
20
- @sinew = sinew
21
- @method = method
22
- @options = options.dup
23
- @uri = parse_url(url)
24
- @cache_key = calculate_cache_key
25
- end
26
-
27
- # run the request, return the result
28
- def perform
29
- validate!
30
-
31
- # merge optons
32
- options = self.options.merge(sinew.runtime_options.httparty_options)
33
-
34
- # merge headers
35
- headers = sinew.runtime_options.headers
36
- headers = headers.merge(options[:headers]) if options[:headers]
37
- options[:headers] = headers
38
-
39
- party_response = HTTParty.send(method, uri, options)
40
- Response.from_network(self, party_response)
41
- end
42
-
43
- # We accept sloppy urls and attempt to clean them up
44
- def parse_url(url)
45
- s = url
46
-
47
- # remove entities
48
- s = HTML_ENTITIES.decode(s)
49
-
50
- # fix a couple of common encoding bugs
51
- s = s.gsub(' ', '%20')
52
- s = s.gsub("'", '%27')
53
-
54
- # append query manually (instead of letting HTTParty handle it) so we can
55
- # include it in cache_key
56
- query = options.delete(:query)
57
- if query.present?
58
- q = HTTParty::HashConversions.to_params(query)
59
- separator = s.include?('?') ? '&' : '?'
60
- s = "#{s}#{separator}#{q}"
61
- end
62
-
63
- URI.parse(s)
64
- end
65
- protected :parse_url
66
-
67
- def calculate_cache_key
68
- dir = pathify(uri.host)
69
-
70
- body_key = if body.is_a?(Hash)
71
- HTTParty::HashConversions.to_params(body)
72
- else
73
- body&.dup
74
- end
75
-
76
- # build key, as a hash for before_generate_cache_key
77
- key = {
78
- method: method.dup,
79
- path: uri.path,
80
- query: uri.query,
81
- body: body_key,
82
- }
83
- key = sinew.runtime_options.before_generate_cache_key.call(key)
84
-
85
- # strip method for gets
86
- key.delete(:method) if key[:method] == 'get'
87
-
88
- # pull out the values, join and pathify
89
- path = key.values.select(&:present?).join(',')
90
- path = pathify(path)
91
-
92
- # shorten long paths
93
- if path.length > 250
94
- path = Digest::MD5.hexdigest(path)
95
- end
96
-
97
- "#{dir}/#{path}"
98
- end
99
- protected :calculate_cache_key
100
-
101
- def validate!
102
- raise "invalid method #{method}" if !VALID_METHODS.include?(method)
103
- raise "invalid url #{uri}" if uri.scheme !~ /^http/
104
- raise "can't get with a body" if method == 'get' && body
105
- raise "Content-Type doesn't make sense without a body" if content_type && !body
106
- end
107
- protected :validate!
108
-
109
- def body
110
- options[:body]
111
- end
112
- protected :body
113
-
114
- def headers
115
- options[:headers]
116
- end
117
- protected :headers
118
-
119
- def content_type
120
- headers && headers['Content-Type']
121
- end
122
- protected :content_type
123
-
124
- def form?
125
- content_type == 'application/x-www-form-urlencoded'
126
- end
127
- protected :form?
128
-
129
- def pathify(s)
130
- # remove leading slash
131
- s = s.gsub(/^\//, '')
132
- # .. => comma
133
- s = s.gsub('..', ',')
134
- # query separators => comma
135
- s = s.gsub(/[?\/&]/, ',')
136
- # ,, => comma
137
- s = s.gsub(',,', ',')
138
- # encode invalid path chars
139
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
140
- hex = i.unpack('H2').first
141
- "%#{hex}"
142
- end
143
- # handle empty case
144
- s = '_root_' if s.blank?
145
- # always downcase
146
- s = s.downcase
147
- s
148
- end
149
- protected :pathify
150
- end
151
- end