sinew 2.0.3 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +26 -0
  3. data/.gitignore +3 -5
  4. data/.rubocop.yml +31 -46
  5. data/Gemfile +9 -0
  6. data/Gemfile.lock +124 -0
  7. data/README.md +146 -81
  8. data/Rakefile +36 -20
  9. data/bin/sinew +13 -39
  10. data/lib/sinew.rb +23 -10
  11. data/lib/sinew/args.rb +53 -0
  12. data/lib/sinew/base.rb +251 -0
  13. data/lib/sinew/csv.rb +89 -0
  14. data/lib/sinew/main.rb +45 -98
  15. data/lib/sinew/middleware/log_formatter.rb +23 -0
  16. data/lib/sinew/nokogiri_ext.rb +12 -21
  17. data/lib/sinew/response.rb +39 -99
  18. data/lib/sinew/version.rb +1 -1
  19. data/sample.rb +13 -0
  20. data/sample.sinew +4 -4
  21. data/sinew.gemspec +26 -25
  22. metadata +46 -108
  23. data/.travis.yml +0 -4
  24. data/.vscode/extensions.json +0 -3
  25. data/.vscode/settings.json +0 -15
  26. data/lib/sinew/cache.rb +0 -79
  27. data/lib/sinew/core_ext.rb +0 -59
  28. data/lib/sinew/dsl.rb +0 -114
  29. data/lib/sinew/output.rb +0 -149
  30. data/lib/sinew/request.rb +0 -151
  31. data/lib/sinew/runtime_options.rb +0 -28
  32. data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
  33. data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
  34. data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
  35. data/test/legacy/eu.httpbin.org/status,500 +0 -1
  36. data/test/legacy/legacy.sinew +0 -2
  37. data/test/recipes/array_header.sinew +0 -6
  38. data/test/recipes/basic.sinew +0 -8
  39. data/test/recipes/dups.sinew +0 -7
  40. data/test/recipes/implicit_header.sinew +0 -5
  41. data/test/recipes/limit.sinew +0 -11
  42. data/test/recipes/noko.sinew +0 -9
  43. data/test/recipes/uri.sinew +0 -11
  44. data/test/recipes/xml.sinew +0 -8
  45. data/test/test.html +0 -45
  46. data/test/test_cache.rb +0 -69
  47. data/test/test_helper.rb +0 -123
  48. data/test/test_legacy.rb +0 -23
  49. data/test/test_main.rb +0 -34
  50. data/test/test_nokogiri_ext.rb +0 -18
  51. data/test/test_output.rb +0 -56
  52. data/test/test_recipes.rb +0 -60
  53. data/test/test_requests.rb +0 -135
  54. data/test/test_utf8.rb +0 -39
@@ -1,59 +0,0 @@
1
- #
2
- # A few core extensions brought over from ActiveSupport. These are handy for
3
- # parsing.
4
- #
5
-
6
- class String
7
- def squish
8
- dup.squish!
9
- end
10
-
11
- def squish!
12
- strip!
13
- gsub!(/\s+/, ' ')
14
- self
15
- end
16
-
17
- def first(limit = 1)
18
- if limit == 0
19
- ''
20
- elsif limit >= size
21
- dup
22
- else
23
- self[0..limit - 1]
24
- end
25
- end
26
-
27
- def last(limit = 1)
28
- if limit == 0
29
- ''
30
- elsif limit >= size
31
- dup
32
- else
33
- self[-limit..-1]
34
- end
35
- end
36
-
37
- alias starts_with? start_with?
38
- alias ends_with? end_with?
39
- end
40
-
41
- #
42
- # blank?/present?
43
- #
44
-
45
- class Object
46
- def blank?
47
- respond_to?(:empty?) ? !!empty? : !self
48
- end
49
-
50
- def present?
51
- !blank?
52
- end
53
- end
54
-
55
- class String
56
- def blank?
57
- !!(self =~ /\A\s*\z/)
58
- end
59
- end
data/lib/sinew/dsl.rb DELETED
@@ -1,114 +0,0 @@
1
- require 'awesome_print'
2
- require 'cgi'
3
-
4
- #
5
- # The DSL available to .sinew files.
6
- #
7
-
8
- module Sinew
9
- class DSL
10
- # this is used to break out of --limit
11
- class LimitError < StandardError; end
12
-
13
- attr_reader :sinew, :raw, :uri, :elapsed
14
-
15
- def initialize(sinew)
16
- @sinew = sinew
17
- end
18
-
19
- def run
20
- tm = Time.now
21
- begin
22
- recipe = sinew.options[:recipe]
23
- instance_eval(File.read(recipe, mode: 'rb'), recipe)
24
- rescue LimitError
25
- # ignore - this is flow control for --limit
26
- end
27
- @elapsed = Time.now - tm
28
- end
29
-
30
- #
31
- # request
32
- #
33
-
34
- def get(url, query = {})
35
- http('get', url, query: query)
36
- end
37
-
38
- def post(url, form = {})
39
- body = form
40
- headers = {
41
- 'Content-Type' => 'application/x-www-form-urlencoded',
42
- }
43
- http('post', url, body: body, headers: headers)
44
- end
45
-
46
- def post_json(url, json = {})
47
- body = json.to_json
48
- headers = {
49
- 'Content-Type' => 'application/json',
50
- }
51
- http('post', url, body: body, headers: headers)
52
- end
53
-
54
- def http(method, url, options = {})
55
- # these need to be cleared before each request
56
- %i[@html @noko @xml @json].each do |i|
57
- instance_variable_set(i, nil)
58
- end
59
-
60
- # fetch and make response available to callers
61
- response = sinew.http(method, url, options)
62
- @uri, @raw = response.uri, response.body
63
-
64
- # don't confuse the user
65
- nil
66
- end
67
-
68
- #
69
- # response
70
- #
71
-
72
- def html
73
- @html ||= begin
74
- s = raw.dup
75
- # squish!
76
- s.squish!
77
- # kill whitespace around tags
78
- s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
79
- s
80
- end
81
- end
82
-
83
- def noko
84
- @noko ||= Nokogiri::HTML(html)
85
- end
86
-
87
- def xml
88
- @xml ||= Nokogiri::XML(html)
89
- end
90
-
91
- def json
92
- @json ||= JSON.parse(raw, symbolize_names: true)
93
- end
94
-
95
- def url
96
- uri.to_s
97
- end
98
-
99
- #
100
- # csv
101
- #
102
-
103
- def csv_header(*args)
104
- sinew.output.header(args)
105
- end
106
-
107
- def csv_emit(row)
108
- sinew.output.emit(row)
109
- if sinew.output.count == sinew.options[:limit]
110
- raise LimitError.new
111
- end
112
- end
113
- end
114
- end
data/lib/sinew/output.rb DELETED
@@ -1,149 +0,0 @@
1
- require 'csv'
2
- require 'set'
3
- require 'stringex'
4
-
5
- #
6
- # Stringex customizations
7
- #
8
-
9
- # turn '&amp;' into '&', not 'and'
10
- Stringex::Localization::DefaultConversions::HTML_ENTITIES[:amp] = '&'
11
-
12
- #
13
- # CSV output.
14
- #
15
-
16
- module Sinew
17
- class Output
18
- attr_reader :sinew, :columns, :rows, :urls, :csv
19
-
20
- def initialize(sinew)
21
- @sinew = sinew
22
- @rows = []
23
- @urls = Set.new
24
- end
25
-
26
- def filename
27
- @filename ||= begin
28
- recipe = sinew.options[:recipe]
29
- ext = File.extname(recipe)
30
- if ext.empty?
31
- "#{recipe}.csv"
32
- else
33
- recipe.gsub(ext, '.csv')
34
- end
35
- end
36
- end
37
-
38
- def header(columns)
39
- sinew.banner("Writing to #{filename}...") if !sinew.quiet?
40
-
41
- columns = columns.flatten
42
- @columns = columns
43
-
44
- # open csv, write header row
45
- @csv = CSV.open(filename, 'wb')
46
- csv << columns
47
- end
48
-
49
- def emit(row)
50
- # implicit header if necessary
51
- header(row.keys) if !csv
52
-
53
- # don't allow duplicate urls
54
- return if dup_url?(row)
55
- rows << row.dup
56
-
57
- # map columns to row, and normalize along the way
58
- print = {}
59
- row = columns.map do |i|
60
- value = normalize(row[i])
61
- print[i] = value if value.present?
62
- value
63
- end
64
-
65
- # print
66
- sinew.vputs print.ai
67
-
68
- csv << row
69
- csv.flush
70
- end
71
-
72
- def count
73
- rows.length
74
- end
75
-
76
- def report
77
- return if count == 0
78
-
79
- sinew.banner("Got #{count} rows.")
80
-
81
- # calculate counts
82
- counts = Hash.new(0)
83
- rows.each do |row|
84
- row.each_pair { |k, v| counts[k] += 1 if v.present? }
85
- end
86
- # sort by counts
87
- cols = columns.sort_by { |i| [ -counts[i], i ] }
88
-
89
- # report
90
- len = cols.map { |i| i.to_s.length }.max
91
- fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
92
- cols.each do |col|
93
- $stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
94
- end
95
- end
96
-
97
- def normalize(s)
98
- # noko/array/misc => string
99
- s = case s
100
- when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
101
- s.inner_html
102
- when Array
103
- s.map(&:to_s).join('|')
104
- else
105
- s.to_s
106
- end
107
-
108
- # strip html tags. Note that we replace tags with spaces
109
- s = s.gsub(/<[^>]+>/, ' ')
110
-
111
- #
112
- # Below uses stringex
113
- #
114
- # github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
115
- # github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
116
- #
117
-
118
- # Converts MS Word 'smart punctuation' to ASCII
119
- s = s.convert_smart_punctuation
120
-
121
- # "&aacute;".convert_accented_html_entities # => "a"
122
- s = s.convert_accented_html_entities
123
-
124
- # &amp, &frac, etc.
125
- s = s.convert_miscellaneous_html_entities
126
-
127
- # convert unicode => regular characters
128
- s = s.to_ascii
129
-
130
- # squish
131
- s = s.squish
132
-
133
- s
134
- end
135
- protected :normalize
136
-
137
- def dup_url?(row)
138
- if url = row[:url]
139
- if urls.include?(url)
140
- sinew.warning("duplicate url: #{url}") if !sinew.quiet?
141
- return true
142
- end
143
- urls << url
144
- end
145
- false
146
- end
147
- protected :dup_url?
148
- end
149
- end
data/lib/sinew/request.rb DELETED
@@ -1,151 +0,0 @@
1
- require 'digest/md5'
2
- require 'httparty'
3
- require 'htmlentities'
4
-
5
- #
6
- # Process a single HTTP request. Mostly a wrapper around HTTParty.
7
- #
8
-
9
- module Sinew
10
- class Error < StandardError; end
11
-
12
- class Request
13
- HTML_ENTITIES = HTMLEntities.new
14
- VALID_METHODS = %w[get post patch put delete head options].freeze
15
-
16
- attr_reader :sinew, :method, :uri, :options, :cache_key
17
-
18
- # Options are largely compatible with HTTParty, except for :method.
19
- def initialize(sinew, method, url, options = {})
20
- @sinew = sinew
21
- @method = method
22
- @options = options.dup
23
- @uri = parse_url(url)
24
- @cache_key = calculate_cache_key
25
- end
26
-
27
- # run the request, return the result
28
- def perform
29
- validate!
30
-
31
- # merge optons
32
- options = self.options.merge(sinew.runtime_options.httparty_options)
33
-
34
- # merge headers
35
- headers = sinew.runtime_options.headers
36
- headers = headers.merge(options[:headers]) if options[:headers]
37
- options[:headers] = headers
38
-
39
- party_response = HTTParty.send(method, uri, options)
40
- Response.from_network(self, party_response)
41
- end
42
-
43
- # We accept sloppy urls and attempt to clean them up
44
- def parse_url(url)
45
- s = url
46
-
47
- # remove entities
48
- s = HTML_ENTITIES.decode(s)
49
-
50
- # fix a couple of common encoding bugs
51
- s = s.gsub(' ', '%20')
52
- s = s.gsub("'", '%27')
53
-
54
- # append query manually (instead of letting HTTParty handle it) so we can
55
- # include it in cache_key
56
- query = options.delete(:query)
57
- if query.present?
58
- q = HTTParty::HashConversions.to_params(query)
59
- separator = s.include?('?') ? '&' : '?'
60
- s = "#{s}#{separator}#{q}"
61
- end
62
-
63
- URI.parse(s)
64
- end
65
- protected :parse_url
66
-
67
- def calculate_cache_key
68
- dir = pathify(uri.host)
69
-
70
- body_key = if body.is_a?(Hash)
71
- HTTParty::HashConversions.to_params(body)
72
- else
73
- body&.dup
74
- end
75
-
76
- # build key, as a hash for before_generate_cache_key
77
- key = {
78
- method: method.dup,
79
- path: uri.path,
80
- query: uri.query,
81
- body: body_key,
82
- }
83
- key = sinew.runtime_options.before_generate_cache_key.call(key)
84
-
85
- # strip method for gets
86
- key.delete(:method) if key[:method] == 'get'
87
-
88
- # pull out the values, join and pathify
89
- path = key.values.select(&:present?).join(',')
90
- path = pathify(path)
91
-
92
- # shorten long paths
93
- if path.length > 250
94
- path = Digest::MD5.hexdigest(path)
95
- end
96
-
97
- "#{dir}/#{path}"
98
- end
99
- protected :calculate_cache_key
100
-
101
- def validate!
102
- raise "invalid method #{method}" if !VALID_METHODS.include?(method)
103
- raise "invalid url #{uri}" if uri.scheme !~ /^http/
104
- raise "can't get with a body" if method == 'get' && body
105
- raise "Content-Type doesn't make sense without a body" if content_type && !body
106
- end
107
- protected :validate!
108
-
109
- def body
110
- options[:body]
111
- end
112
- protected :body
113
-
114
- def headers
115
- options[:headers]
116
- end
117
- protected :headers
118
-
119
- def content_type
120
- headers && headers['Content-Type']
121
- end
122
- protected :content_type
123
-
124
- def form?
125
- content_type == 'application/x-www-form-urlencoded'
126
- end
127
- protected :form?
128
-
129
- def pathify(s)
130
- # remove leading slash
131
- s = s.gsub(/^\//, '')
132
- # .. => comma
133
- s = s.gsub('..', ',')
134
- # query separators => comma
135
- s = s.gsub(/[?\/&]/, ',')
136
- # ,, => comma
137
- s = s.gsub(',,', ',')
138
- # encode invalid path chars
139
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
140
- hex = i.unpack('H2').first
141
- "%#{hex}"
142
- end
143
- # handle empty case
144
- s = '_root_' if s.blank?
145
- # always downcase
146
- s = s.downcase
147
- s
148
- end
149
- protected :pathify
150
- end
151
- end