sinew 2.0.3 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +26 -0
- data/.gitignore +3 -5
- data/.rubocop.yml +31 -46
- data/Gemfile +9 -0
- data/Gemfile.lock +124 -0
- data/README.md +146 -81
- data/Rakefile +36 -20
- data/bin/sinew +13 -39
- data/lib/sinew.rb +23 -10
- data/lib/sinew/args.rb +53 -0
- data/lib/sinew/base.rb +251 -0
- data/lib/sinew/csv.rb +89 -0
- data/lib/sinew/main.rb +45 -98
- data/lib/sinew/middleware/log_formatter.rb +23 -0
- data/lib/sinew/nokogiri_ext.rb +12 -21
- data/lib/sinew/response.rb +39 -99
- data/lib/sinew/version.rb +1 -1
- data/sample.rb +13 -0
- data/sample.sinew +4 -4
- data/sinew.gemspec +26 -25
- metadata +46 -108
- data/.travis.yml +0 -4
- data/.vscode/extensions.json +0 -3
- data/.vscode/settings.json +0 -15
- data/lib/sinew/cache.rb +0 -79
- data/lib/sinew/core_ext.rb +0 -59
- data/lib/sinew/dsl.rb +0 -114
- data/lib/sinew/output.rb +0 -149
- data/lib/sinew/request.rb +0 -151
- data/lib/sinew/runtime_options.rb +0 -28
- data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
- data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
- data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
- data/test/legacy/eu.httpbin.org/status,500 +0 -1
- data/test/legacy/legacy.sinew +0 -2
- data/test/recipes/array_header.sinew +0 -6
- data/test/recipes/basic.sinew +0 -8
- data/test/recipes/dups.sinew +0 -7
- data/test/recipes/implicit_header.sinew +0 -5
- data/test/recipes/limit.sinew +0 -11
- data/test/recipes/noko.sinew +0 -9
- data/test/recipes/uri.sinew +0 -11
- data/test/recipes/xml.sinew +0 -8
- data/test/test.html +0 -45
- data/test/test_cache.rb +0 -69
- data/test/test_helper.rb +0 -123
- data/test/test_legacy.rb +0 -23
- data/test/test_main.rb +0 -34
- data/test/test_nokogiri_ext.rb +0 -18
- data/test/test_output.rb +0 -56
- data/test/test_recipes.rb +0 -60
- data/test/test_requests.rb +0 -135
- data/test/test_utf8.rb +0 -39
data/lib/sinew/core_ext.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# A few core extensions brought over from ActiveSupport. These are handy for
|
3
|
-
# parsing.
|
4
|
-
#
|
5
|
-
|
6
|
-
class String
|
7
|
-
def squish
|
8
|
-
dup.squish!
|
9
|
-
end
|
10
|
-
|
11
|
-
def squish!
|
12
|
-
strip!
|
13
|
-
gsub!(/\s+/, ' ')
|
14
|
-
self
|
15
|
-
end
|
16
|
-
|
17
|
-
def first(limit = 1)
|
18
|
-
if limit == 0
|
19
|
-
''
|
20
|
-
elsif limit >= size
|
21
|
-
dup
|
22
|
-
else
|
23
|
-
self[0..limit - 1]
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def last(limit = 1)
|
28
|
-
if limit == 0
|
29
|
-
''
|
30
|
-
elsif limit >= size
|
31
|
-
dup
|
32
|
-
else
|
33
|
-
self[-limit..-1]
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
alias starts_with? start_with?
|
38
|
-
alias ends_with? end_with?
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# blank?/present?
|
43
|
-
#
|
44
|
-
|
45
|
-
class Object
|
46
|
-
def blank?
|
47
|
-
respond_to?(:empty?) ? !!empty? : !self
|
48
|
-
end
|
49
|
-
|
50
|
-
def present?
|
51
|
-
!blank?
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
class String
|
56
|
-
def blank?
|
57
|
-
!!(self =~ /\A\s*\z/)
|
58
|
-
end
|
59
|
-
end
|
data/lib/sinew/dsl.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
require 'awesome_print'
|
2
|
-
require 'cgi'
|
3
|
-
|
4
|
-
#
|
5
|
-
# The DSL available to .sinew files.
|
6
|
-
#
|
7
|
-
|
8
|
-
module Sinew
|
9
|
-
class DSL
|
10
|
-
# this is used to break out of --limit
|
11
|
-
class LimitError < StandardError; end
|
12
|
-
|
13
|
-
attr_reader :sinew, :raw, :uri, :elapsed
|
14
|
-
|
15
|
-
def initialize(sinew)
|
16
|
-
@sinew = sinew
|
17
|
-
end
|
18
|
-
|
19
|
-
def run
|
20
|
-
tm = Time.now
|
21
|
-
begin
|
22
|
-
recipe = sinew.options[:recipe]
|
23
|
-
instance_eval(File.read(recipe, mode: 'rb'), recipe)
|
24
|
-
rescue LimitError
|
25
|
-
# ignore - this is flow control for --limit
|
26
|
-
end
|
27
|
-
@elapsed = Time.now - tm
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# request
|
32
|
-
#
|
33
|
-
|
34
|
-
def get(url, query = {})
|
35
|
-
http('get', url, query: query)
|
36
|
-
end
|
37
|
-
|
38
|
-
def post(url, form = {})
|
39
|
-
body = form
|
40
|
-
headers = {
|
41
|
-
'Content-Type' => 'application/x-www-form-urlencoded',
|
42
|
-
}
|
43
|
-
http('post', url, body: body, headers: headers)
|
44
|
-
end
|
45
|
-
|
46
|
-
def post_json(url, json = {})
|
47
|
-
body = json.to_json
|
48
|
-
headers = {
|
49
|
-
'Content-Type' => 'application/json',
|
50
|
-
}
|
51
|
-
http('post', url, body: body, headers: headers)
|
52
|
-
end
|
53
|
-
|
54
|
-
def http(method, url, options = {})
|
55
|
-
# these need to be cleared before each request
|
56
|
-
%i[@html @noko @xml @json].each do |i|
|
57
|
-
instance_variable_set(i, nil)
|
58
|
-
end
|
59
|
-
|
60
|
-
# fetch and make response available to callers
|
61
|
-
response = sinew.http(method, url, options)
|
62
|
-
@uri, @raw = response.uri, response.body
|
63
|
-
|
64
|
-
# don't confuse the user
|
65
|
-
nil
|
66
|
-
end
|
67
|
-
|
68
|
-
#
|
69
|
-
# response
|
70
|
-
#
|
71
|
-
|
72
|
-
def html
|
73
|
-
@html ||= begin
|
74
|
-
s = raw.dup
|
75
|
-
# squish!
|
76
|
-
s.squish!
|
77
|
-
# kill whitespace around tags
|
78
|
-
s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
|
79
|
-
s
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def noko
|
84
|
-
@noko ||= Nokogiri::HTML(html)
|
85
|
-
end
|
86
|
-
|
87
|
-
def xml
|
88
|
-
@xml ||= Nokogiri::XML(html)
|
89
|
-
end
|
90
|
-
|
91
|
-
def json
|
92
|
-
@json ||= JSON.parse(raw, symbolize_names: true)
|
93
|
-
end
|
94
|
-
|
95
|
-
def url
|
96
|
-
uri.to_s
|
97
|
-
end
|
98
|
-
|
99
|
-
#
|
100
|
-
# csv
|
101
|
-
#
|
102
|
-
|
103
|
-
def csv_header(*args)
|
104
|
-
sinew.output.header(args)
|
105
|
-
end
|
106
|
-
|
107
|
-
def csv_emit(row)
|
108
|
-
sinew.output.emit(row)
|
109
|
-
if sinew.output.count == sinew.options[:limit]
|
110
|
-
raise LimitError.new
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
data/lib/sinew/output.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
require 'set'
|
3
|
-
require 'stringex'
|
4
|
-
|
5
|
-
#
|
6
|
-
# Stringex customizations
|
7
|
-
#
|
8
|
-
|
9
|
-
# turn '&' into '&', not 'and'
|
10
|
-
Stringex::Localization::DefaultConversions::HTML_ENTITIES[:amp] = '&'
|
11
|
-
|
12
|
-
#
|
13
|
-
# CSV output.
|
14
|
-
#
|
15
|
-
|
16
|
-
module Sinew
|
17
|
-
class Output
|
18
|
-
attr_reader :sinew, :columns, :rows, :urls, :csv
|
19
|
-
|
20
|
-
def initialize(sinew)
|
21
|
-
@sinew = sinew
|
22
|
-
@rows = []
|
23
|
-
@urls = Set.new
|
24
|
-
end
|
25
|
-
|
26
|
-
def filename
|
27
|
-
@filename ||= begin
|
28
|
-
recipe = sinew.options[:recipe]
|
29
|
-
ext = File.extname(recipe)
|
30
|
-
if ext.empty?
|
31
|
-
"#{recipe}.csv"
|
32
|
-
else
|
33
|
-
recipe.gsub(ext, '.csv')
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def header(columns)
|
39
|
-
sinew.banner("Writing to #{filename}...") if !sinew.quiet?
|
40
|
-
|
41
|
-
columns = columns.flatten
|
42
|
-
@columns = columns
|
43
|
-
|
44
|
-
# open csv, write header row
|
45
|
-
@csv = CSV.open(filename, 'wb')
|
46
|
-
csv << columns
|
47
|
-
end
|
48
|
-
|
49
|
-
def emit(row)
|
50
|
-
# implicit header if necessary
|
51
|
-
header(row.keys) if !csv
|
52
|
-
|
53
|
-
# don't allow duplicate urls
|
54
|
-
return if dup_url?(row)
|
55
|
-
rows << row.dup
|
56
|
-
|
57
|
-
# map columns to row, and normalize along the way
|
58
|
-
print = {}
|
59
|
-
row = columns.map do |i|
|
60
|
-
value = normalize(row[i])
|
61
|
-
print[i] = value if value.present?
|
62
|
-
value
|
63
|
-
end
|
64
|
-
|
65
|
-
# print
|
66
|
-
sinew.vputs print.ai
|
67
|
-
|
68
|
-
csv << row
|
69
|
-
csv.flush
|
70
|
-
end
|
71
|
-
|
72
|
-
def count
|
73
|
-
rows.length
|
74
|
-
end
|
75
|
-
|
76
|
-
def report
|
77
|
-
return if count == 0
|
78
|
-
|
79
|
-
sinew.banner("Got #{count} rows.")
|
80
|
-
|
81
|
-
# calculate counts
|
82
|
-
counts = Hash.new(0)
|
83
|
-
rows.each do |row|
|
84
|
-
row.each_pair { |k, v| counts[k] += 1 if v.present? }
|
85
|
-
end
|
86
|
-
# sort by counts
|
87
|
-
cols = columns.sort_by { |i| [ -counts[i], i ] }
|
88
|
-
|
89
|
-
# report
|
90
|
-
len = cols.map { |i| i.to_s.length }.max
|
91
|
-
fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
|
92
|
-
cols.each do |col|
|
93
|
-
$stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def normalize(s)
|
98
|
-
# noko/array/misc => string
|
99
|
-
s = case s
|
100
|
-
when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
|
101
|
-
s.inner_html
|
102
|
-
when Array
|
103
|
-
s.map(&:to_s).join('|')
|
104
|
-
else
|
105
|
-
s.to_s
|
106
|
-
end
|
107
|
-
|
108
|
-
# strip html tags. Note that we replace tags with spaces
|
109
|
-
s = s.gsub(/<[^>]+>/, ' ')
|
110
|
-
|
111
|
-
#
|
112
|
-
# Below uses stringex
|
113
|
-
#
|
114
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
|
115
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
|
116
|
-
#
|
117
|
-
|
118
|
-
# Converts MS Word 'smart punctuation' to ASCII
|
119
|
-
s = s.convert_smart_punctuation
|
120
|
-
|
121
|
-
# "á".convert_accented_html_entities # => "a"
|
122
|
-
s = s.convert_accented_html_entities
|
123
|
-
|
124
|
-
# &, &frac, etc.
|
125
|
-
s = s.convert_miscellaneous_html_entities
|
126
|
-
|
127
|
-
# convert unicode => regular characters
|
128
|
-
s = s.to_ascii
|
129
|
-
|
130
|
-
# squish
|
131
|
-
s = s.squish
|
132
|
-
|
133
|
-
s
|
134
|
-
end
|
135
|
-
protected :normalize
|
136
|
-
|
137
|
-
def dup_url?(row)
|
138
|
-
if url = row[:url]
|
139
|
-
if urls.include?(url)
|
140
|
-
sinew.warning("duplicate url: #{url}") if !sinew.quiet?
|
141
|
-
return true
|
142
|
-
end
|
143
|
-
urls << url
|
144
|
-
end
|
145
|
-
false
|
146
|
-
end
|
147
|
-
protected :dup_url?
|
148
|
-
end
|
149
|
-
end
|
data/lib/sinew/request.rb
DELETED
@@ -1,151 +0,0 @@
|
|
1
|
-
require 'digest/md5'
|
2
|
-
require 'httparty'
|
3
|
-
require 'htmlentities'
|
4
|
-
|
5
|
-
#
|
6
|
-
# Process a single HTTP request. Mostly a wrapper around HTTParty.
|
7
|
-
#
|
8
|
-
|
9
|
-
module Sinew
|
10
|
-
class Error < StandardError; end
|
11
|
-
|
12
|
-
class Request
|
13
|
-
HTML_ENTITIES = HTMLEntities.new
|
14
|
-
VALID_METHODS = %w[get post patch put delete head options].freeze
|
15
|
-
|
16
|
-
attr_reader :sinew, :method, :uri, :options, :cache_key
|
17
|
-
|
18
|
-
# Options are largely compatible with HTTParty, except for :method.
|
19
|
-
def initialize(sinew, method, url, options = {})
|
20
|
-
@sinew = sinew
|
21
|
-
@method = method
|
22
|
-
@options = options.dup
|
23
|
-
@uri = parse_url(url)
|
24
|
-
@cache_key = calculate_cache_key
|
25
|
-
end
|
26
|
-
|
27
|
-
# run the request, return the result
|
28
|
-
def perform
|
29
|
-
validate!
|
30
|
-
|
31
|
-
# merge optons
|
32
|
-
options = self.options.merge(sinew.runtime_options.httparty_options)
|
33
|
-
|
34
|
-
# merge headers
|
35
|
-
headers = sinew.runtime_options.headers
|
36
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
37
|
-
options[:headers] = headers
|
38
|
-
|
39
|
-
party_response = HTTParty.send(method, uri, options)
|
40
|
-
Response.from_network(self, party_response)
|
41
|
-
end
|
42
|
-
|
43
|
-
# We accept sloppy urls and attempt to clean them up
|
44
|
-
def parse_url(url)
|
45
|
-
s = url
|
46
|
-
|
47
|
-
# remove entities
|
48
|
-
s = HTML_ENTITIES.decode(s)
|
49
|
-
|
50
|
-
# fix a couple of common encoding bugs
|
51
|
-
s = s.gsub(' ', '%20')
|
52
|
-
s = s.gsub("'", '%27')
|
53
|
-
|
54
|
-
# append query manually (instead of letting HTTParty handle it) so we can
|
55
|
-
# include it in cache_key
|
56
|
-
query = options.delete(:query)
|
57
|
-
if query.present?
|
58
|
-
q = HTTParty::HashConversions.to_params(query)
|
59
|
-
separator = s.include?('?') ? '&' : '?'
|
60
|
-
s = "#{s}#{separator}#{q}"
|
61
|
-
end
|
62
|
-
|
63
|
-
URI.parse(s)
|
64
|
-
end
|
65
|
-
protected :parse_url
|
66
|
-
|
67
|
-
def calculate_cache_key
|
68
|
-
dir = pathify(uri.host)
|
69
|
-
|
70
|
-
body_key = if body.is_a?(Hash)
|
71
|
-
HTTParty::HashConversions.to_params(body)
|
72
|
-
else
|
73
|
-
body&.dup
|
74
|
-
end
|
75
|
-
|
76
|
-
# build key, as a hash for before_generate_cache_key
|
77
|
-
key = {
|
78
|
-
method: method.dup,
|
79
|
-
path: uri.path,
|
80
|
-
query: uri.query,
|
81
|
-
body: body_key,
|
82
|
-
}
|
83
|
-
key = sinew.runtime_options.before_generate_cache_key.call(key)
|
84
|
-
|
85
|
-
# strip method for gets
|
86
|
-
key.delete(:method) if key[:method] == 'get'
|
87
|
-
|
88
|
-
# pull out the values, join and pathify
|
89
|
-
path = key.values.select(&:present?).join(',')
|
90
|
-
path = pathify(path)
|
91
|
-
|
92
|
-
# shorten long paths
|
93
|
-
if path.length > 250
|
94
|
-
path = Digest::MD5.hexdigest(path)
|
95
|
-
end
|
96
|
-
|
97
|
-
"#{dir}/#{path}"
|
98
|
-
end
|
99
|
-
protected :calculate_cache_key
|
100
|
-
|
101
|
-
def validate!
|
102
|
-
raise "invalid method #{method}" if !VALID_METHODS.include?(method)
|
103
|
-
raise "invalid url #{uri}" if uri.scheme !~ /^http/
|
104
|
-
raise "can't get with a body" if method == 'get' && body
|
105
|
-
raise "Content-Type doesn't make sense without a body" if content_type && !body
|
106
|
-
end
|
107
|
-
protected :validate!
|
108
|
-
|
109
|
-
def body
|
110
|
-
options[:body]
|
111
|
-
end
|
112
|
-
protected :body
|
113
|
-
|
114
|
-
def headers
|
115
|
-
options[:headers]
|
116
|
-
end
|
117
|
-
protected :headers
|
118
|
-
|
119
|
-
def content_type
|
120
|
-
headers && headers['Content-Type']
|
121
|
-
end
|
122
|
-
protected :content_type
|
123
|
-
|
124
|
-
def form?
|
125
|
-
content_type == 'application/x-www-form-urlencoded'
|
126
|
-
end
|
127
|
-
protected :form?
|
128
|
-
|
129
|
-
def pathify(s)
|
130
|
-
# remove leading slash
|
131
|
-
s = s.gsub(/^\//, '')
|
132
|
-
# .. => comma
|
133
|
-
s = s.gsub('..', ',')
|
134
|
-
# query separators => comma
|
135
|
-
s = s.gsub(/[?\/&]/, ',')
|
136
|
-
# ,, => comma
|
137
|
-
s = s.gsub(',,', ',')
|
138
|
-
# encode invalid path chars
|
139
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
140
|
-
hex = i.unpack('H2').first
|
141
|
-
"%#{hex}"
|
142
|
-
end
|
143
|
-
# handle empty case
|
144
|
-
s = '_root_' if s.blank?
|
145
|
-
# always downcase
|
146
|
-
s = s.downcase
|
147
|
-
s
|
148
|
-
end
|
149
|
-
protected :pathify
|
150
|
-
end
|
151
|
-
end
|