sinew 2.0.3 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +26 -0
- data/.gitignore +3 -5
- data/.rubocop.yml +31 -46
- data/Gemfile +9 -0
- data/Gemfile.lock +124 -0
- data/README.md +146 -81
- data/Rakefile +36 -20
- data/bin/sinew +13 -39
- data/lib/sinew.rb +23 -10
- data/lib/sinew/args.rb +53 -0
- data/lib/sinew/base.rb +251 -0
- data/lib/sinew/csv.rb +89 -0
- data/lib/sinew/main.rb +45 -98
- data/lib/sinew/middleware/log_formatter.rb +23 -0
- data/lib/sinew/nokogiri_ext.rb +12 -21
- data/lib/sinew/response.rb +39 -99
- data/lib/sinew/version.rb +1 -1
- data/sample.rb +13 -0
- data/sample.sinew +4 -4
- data/sinew.gemspec +26 -25
- metadata +46 -108
- data/.travis.yml +0 -4
- data/.vscode/extensions.json +0 -3
- data/.vscode/settings.json +0 -15
- data/lib/sinew/cache.rb +0 -79
- data/lib/sinew/core_ext.rb +0 -59
- data/lib/sinew/dsl.rb +0 -114
- data/lib/sinew/output.rb +0 -149
- data/lib/sinew/request.rb +0 -151
- data/lib/sinew/runtime_options.rb +0 -28
- data/test/legacy/eu.httpbin.org/head/redirect,3 +0 -51
- data/test/legacy/eu.httpbin.org/head/status,500 +0 -1
- data/test/legacy/eu.httpbin.org/redirect,3 +0 -11
- data/test/legacy/eu.httpbin.org/status,500 +0 -1
- data/test/legacy/legacy.sinew +0 -2
- data/test/recipes/array_header.sinew +0 -6
- data/test/recipes/basic.sinew +0 -8
- data/test/recipes/dups.sinew +0 -7
- data/test/recipes/implicit_header.sinew +0 -5
- data/test/recipes/limit.sinew +0 -11
- data/test/recipes/noko.sinew +0 -9
- data/test/recipes/uri.sinew +0 -11
- data/test/recipes/xml.sinew +0 -8
- data/test/test.html +0 -45
- data/test/test_cache.rb +0 -69
- data/test/test_helper.rb +0 -123
- data/test/test_legacy.rb +0 -23
- data/test/test_main.rb +0 -34
- data/test/test_nokogiri_ext.rb +0 -18
- data/test/test_output.rb +0 -56
- data/test/test_recipes.rb +0 -60
- data/test/test_requests.rb +0 -135
- data/test/test_utf8.rb +0 -39
data/lib/sinew/core_ext.rb
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# A few core extensions brought over from ActiveSupport. These are handy for
|
3
|
-
# parsing.
|
4
|
-
#
|
5
|
-
|
6
|
-
class String
|
7
|
-
def squish
|
8
|
-
dup.squish!
|
9
|
-
end
|
10
|
-
|
11
|
-
def squish!
|
12
|
-
strip!
|
13
|
-
gsub!(/\s+/, ' ')
|
14
|
-
self
|
15
|
-
end
|
16
|
-
|
17
|
-
def first(limit = 1)
|
18
|
-
if limit == 0
|
19
|
-
''
|
20
|
-
elsif limit >= size
|
21
|
-
dup
|
22
|
-
else
|
23
|
-
self[0..limit - 1]
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def last(limit = 1)
|
28
|
-
if limit == 0
|
29
|
-
''
|
30
|
-
elsif limit >= size
|
31
|
-
dup
|
32
|
-
else
|
33
|
-
self[-limit..-1]
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
alias starts_with? start_with?
|
38
|
-
alias ends_with? end_with?
|
39
|
-
end
|
40
|
-
|
41
|
-
#
|
42
|
-
# blank?/present?
|
43
|
-
#
|
44
|
-
|
45
|
-
class Object
|
46
|
-
def blank?
|
47
|
-
respond_to?(:empty?) ? !!empty? : !self
|
48
|
-
end
|
49
|
-
|
50
|
-
def present?
|
51
|
-
!blank?
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
class String
|
56
|
-
def blank?
|
57
|
-
!!(self =~ /\A\s*\z/)
|
58
|
-
end
|
59
|
-
end
|
data/lib/sinew/dsl.rb
DELETED
@@ -1,114 +0,0 @@
|
|
1
|
-
require 'awesome_print'
|
2
|
-
require 'cgi'
|
3
|
-
|
4
|
-
#
|
5
|
-
# The DSL available to .sinew files.
|
6
|
-
#
|
7
|
-
|
8
|
-
module Sinew
|
9
|
-
class DSL
|
10
|
-
# this is used to break out of --limit
|
11
|
-
class LimitError < StandardError; end
|
12
|
-
|
13
|
-
attr_reader :sinew, :raw, :uri, :elapsed
|
14
|
-
|
15
|
-
def initialize(sinew)
|
16
|
-
@sinew = sinew
|
17
|
-
end
|
18
|
-
|
19
|
-
def run
|
20
|
-
tm = Time.now
|
21
|
-
begin
|
22
|
-
recipe = sinew.options[:recipe]
|
23
|
-
instance_eval(File.read(recipe, mode: 'rb'), recipe)
|
24
|
-
rescue LimitError
|
25
|
-
# ignore - this is flow control for --limit
|
26
|
-
end
|
27
|
-
@elapsed = Time.now - tm
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# request
|
32
|
-
#
|
33
|
-
|
34
|
-
def get(url, query = {})
|
35
|
-
http('get', url, query: query)
|
36
|
-
end
|
37
|
-
|
38
|
-
def post(url, form = {})
|
39
|
-
body = form
|
40
|
-
headers = {
|
41
|
-
'Content-Type' => 'application/x-www-form-urlencoded',
|
42
|
-
}
|
43
|
-
http('post', url, body: body, headers: headers)
|
44
|
-
end
|
45
|
-
|
46
|
-
def post_json(url, json = {})
|
47
|
-
body = json.to_json
|
48
|
-
headers = {
|
49
|
-
'Content-Type' => 'application/json',
|
50
|
-
}
|
51
|
-
http('post', url, body: body, headers: headers)
|
52
|
-
end
|
53
|
-
|
54
|
-
def http(method, url, options = {})
|
55
|
-
# these need to be cleared before each request
|
56
|
-
%i[@html @noko @xml @json].each do |i|
|
57
|
-
instance_variable_set(i, nil)
|
58
|
-
end
|
59
|
-
|
60
|
-
# fetch and make response available to callers
|
61
|
-
response = sinew.http(method, url, options)
|
62
|
-
@uri, @raw = response.uri, response.body
|
63
|
-
|
64
|
-
# don't confuse the user
|
65
|
-
nil
|
66
|
-
end
|
67
|
-
|
68
|
-
#
|
69
|
-
# response
|
70
|
-
#
|
71
|
-
|
72
|
-
def html
|
73
|
-
@html ||= begin
|
74
|
-
s = raw.dup
|
75
|
-
# squish!
|
76
|
-
s.squish!
|
77
|
-
# kill whitespace around tags
|
78
|
-
s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
|
79
|
-
s
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def noko
|
84
|
-
@noko ||= Nokogiri::HTML(html)
|
85
|
-
end
|
86
|
-
|
87
|
-
def xml
|
88
|
-
@xml ||= Nokogiri::XML(html)
|
89
|
-
end
|
90
|
-
|
91
|
-
def json
|
92
|
-
@json ||= JSON.parse(raw, symbolize_names: true)
|
93
|
-
end
|
94
|
-
|
95
|
-
def url
|
96
|
-
uri.to_s
|
97
|
-
end
|
98
|
-
|
99
|
-
#
|
100
|
-
# csv
|
101
|
-
#
|
102
|
-
|
103
|
-
def csv_header(*args)
|
104
|
-
sinew.output.header(args)
|
105
|
-
end
|
106
|
-
|
107
|
-
def csv_emit(row)
|
108
|
-
sinew.output.emit(row)
|
109
|
-
if sinew.output.count == sinew.options[:limit]
|
110
|
-
raise LimitError.new
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
data/lib/sinew/output.rb
DELETED
@@ -1,149 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
require 'set'
|
3
|
-
require 'stringex'
|
4
|
-
|
5
|
-
#
|
6
|
-
# Stringex customizations
|
7
|
-
#
|
8
|
-
|
9
|
-
# turn '&' into '&', not 'and'
|
10
|
-
Stringex::Localization::DefaultConversions::HTML_ENTITIES[:amp] = '&'
|
11
|
-
|
12
|
-
#
|
13
|
-
# CSV output.
|
14
|
-
#
|
15
|
-
|
16
|
-
module Sinew
|
17
|
-
class Output
|
18
|
-
attr_reader :sinew, :columns, :rows, :urls, :csv
|
19
|
-
|
20
|
-
def initialize(sinew)
|
21
|
-
@sinew = sinew
|
22
|
-
@rows = []
|
23
|
-
@urls = Set.new
|
24
|
-
end
|
25
|
-
|
26
|
-
def filename
|
27
|
-
@filename ||= begin
|
28
|
-
recipe = sinew.options[:recipe]
|
29
|
-
ext = File.extname(recipe)
|
30
|
-
if ext.empty?
|
31
|
-
"#{recipe}.csv"
|
32
|
-
else
|
33
|
-
recipe.gsub(ext, '.csv')
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def header(columns)
|
39
|
-
sinew.banner("Writing to #{filename}...") if !sinew.quiet?
|
40
|
-
|
41
|
-
columns = columns.flatten
|
42
|
-
@columns = columns
|
43
|
-
|
44
|
-
# open csv, write header row
|
45
|
-
@csv = CSV.open(filename, 'wb')
|
46
|
-
csv << columns
|
47
|
-
end
|
48
|
-
|
49
|
-
def emit(row)
|
50
|
-
# implicit header if necessary
|
51
|
-
header(row.keys) if !csv
|
52
|
-
|
53
|
-
# don't allow duplicate urls
|
54
|
-
return if dup_url?(row)
|
55
|
-
rows << row.dup
|
56
|
-
|
57
|
-
# map columns to row, and normalize along the way
|
58
|
-
print = {}
|
59
|
-
row = columns.map do |i|
|
60
|
-
value = normalize(row[i])
|
61
|
-
print[i] = value if value.present?
|
62
|
-
value
|
63
|
-
end
|
64
|
-
|
65
|
-
# print
|
66
|
-
sinew.vputs print.ai
|
67
|
-
|
68
|
-
csv << row
|
69
|
-
csv.flush
|
70
|
-
end
|
71
|
-
|
72
|
-
def count
|
73
|
-
rows.length
|
74
|
-
end
|
75
|
-
|
76
|
-
def report
|
77
|
-
return if count == 0
|
78
|
-
|
79
|
-
sinew.banner("Got #{count} rows.")
|
80
|
-
|
81
|
-
# calculate counts
|
82
|
-
counts = Hash.new(0)
|
83
|
-
rows.each do |row|
|
84
|
-
row.each_pair { |k, v| counts[k] += 1 if v.present? }
|
85
|
-
end
|
86
|
-
# sort by counts
|
87
|
-
cols = columns.sort_by { |i| [ -counts[i], i ] }
|
88
|
-
|
89
|
-
# report
|
90
|
-
len = cols.map { |i| i.to_s.length }.max
|
91
|
-
fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
|
92
|
-
cols.each do |col|
|
93
|
-
$stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def normalize(s)
|
98
|
-
# noko/array/misc => string
|
99
|
-
s = case s
|
100
|
-
when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
|
101
|
-
s.inner_html
|
102
|
-
when Array
|
103
|
-
s.map(&:to_s).join('|')
|
104
|
-
else
|
105
|
-
s.to_s
|
106
|
-
end
|
107
|
-
|
108
|
-
# strip html tags. Note that we replace tags with spaces
|
109
|
-
s = s.gsub(/<[^>]+>/, ' ')
|
110
|
-
|
111
|
-
#
|
112
|
-
# Below uses stringex
|
113
|
-
#
|
114
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/string_extensions.rb
|
115
|
-
# github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
|
116
|
-
#
|
117
|
-
|
118
|
-
# Converts MS Word 'smart punctuation' to ASCII
|
119
|
-
s = s.convert_smart_punctuation
|
120
|
-
|
121
|
-
# "á".convert_accented_html_entities # => "a"
|
122
|
-
s = s.convert_accented_html_entities
|
123
|
-
|
124
|
-
# &, &frac, etc.
|
125
|
-
s = s.convert_miscellaneous_html_entities
|
126
|
-
|
127
|
-
# convert unicode => regular characters
|
128
|
-
s = s.to_ascii
|
129
|
-
|
130
|
-
# squish
|
131
|
-
s = s.squish
|
132
|
-
|
133
|
-
s
|
134
|
-
end
|
135
|
-
protected :normalize
|
136
|
-
|
137
|
-
def dup_url?(row)
|
138
|
-
if url = row[:url]
|
139
|
-
if urls.include?(url)
|
140
|
-
sinew.warning("duplicate url: #{url}") if !sinew.quiet?
|
141
|
-
return true
|
142
|
-
end
|
143
|
-
urls << url
|
144
|
-
end
|
145
|
-
false
|
146
|
-
end
|
147
|
-
protected :dup_url?
|
148
|
-
end
|
149
|
-
end
|
data/lib/sinew/request.rb
DELETED
@@ -1,151 +0,0 @@
|
|
1
|
-
require 'digest/md5'
|
2
|
-
require 'httparty'
|
3
|
-
require 'htmlentities'
|
4
|
-
|
5
|
-
#
|
6
|
-
# Process a single HTTP request. Mostly a wrapper around HTTParty.
|
7
|
-
#
|
8
|
-
|
9
|
-
module Sinew
|
10
|
-
class Error < StandardError; end
|
11
|
-
|
12
|
-
class Request
|
13
|
-
HTML_ENTITIES = HTMLEntities.new
|
14
|
-
VALID_METHODS = %w[get post patch put delete head options].freeze
|
15
|
-
|
16
|
-
attr_reader :sinew, :method, :uri, :options, :cache_key
|
17
|
-
|
18
|
-
# Options are largely compatible with HTTParty, except for :method.
|
19
|
-
def initialize(sinew, method, url, options = {})
|
20
|
-
@sinew = sinew
|
21
|
-
@method = method
|
22
|
-
@options = options.dup
|
23
|
-
@uri = parse_url(url)
|
24
|
-
@cache_key = calculate_cache_key
|
25
|
-
end
|
26
|
-
|
27
|
-
# run the request, return the result
|
28
|
-
def perform
|
29
|
-
validate!
|
30
|
-
|
31
|
-
# merge optons
|
32
|
-
options = self.options.merge(sinew.runtime_options.httparty_options)
|
33
|
-
|
34
|
-
# merge headers
|
35
|
-
headers = sinew.runtime_options.headers
|
36
|
-
headers = headers.merge(options[:headers]) if options[:headers]
|
37
|
-
options[:headers] = headers
|
38
|
-
|
39
|
-
party_response = HTTParty.send(method, uri, options)
|
40
|
-
Response.from_network(self, party_response)
|
41
|
-
end
|
42
|
-
|
43
|
-
# We accept sloppy urls and attempt to clean them up
|
44
|
-
def parse_url(url)
|
45
|
-
s = url
|
46
|
-
|
47
|
-
# remove entities
|
48
|
-
s = HTML_ENTITIES.decode(s)
|
49
|
-
|
50
|
-
# fix a couple of common encoding bugs
|
51
|
-
s = s.gsub(' ', '%20')
|
52
|
-
s = s.gsub("'", '%27')
|
53
|
-
|
54
|
-
# append query manually (instead of letting HTTParty handle it) so we can
|
55
|
-
# include it in cache_key
|
56
|
-
query = options.delete(:query)
|
57
|
-
if query.present?
|
58
|
-
q = HTTParty::HashConversions.to_params(query)
|
59
|
-
separator = s.include?('?') ? '&' : '?'
|
60
|
-
s = "#{s}#{separator}#{q}"
|
61
|
-
end
|
62
|
-
|
63
|
-
URI.parse(s)
|
64
|
-
end
|
65
|
-
protected :parse_url
|
66
|
-
|
67
|
-
def calculate_cache_key
|
68
|
-
dir = pathify(uri.host)
|
69
|
-
|
70
|
-
body_key = if body.is_a?(Hash)
|
71
|
-
HTTParty::HashConversions.to_params(body)
|
72
|
-
else
|
73
|
-
body&.dup
|
74
|
-
end
|
75
|
-
|
76
|
-
# build key, as a hash for before_generate_cache_key
|
77
|
-
key = {
|
78
|
-
method: method.dup,
|
79
|
-
path: uri.path,
|
80
|
-
query: uri.query,
|
81
|
-
body: body_key,
|
82
|
-
}
|
83
|
-
key = sinew.runtime_options.before_generate_cache_key.call(key)
|
84
|
-
|
85
|
-
# strip method for gets
|
86
|
-
key.delete(:method) if key[:method] == 'get'
|
87
|
-
|
88
|
-
# pull out the values, join and pathify
|
89
|
-
path = key.values.select(&:present?).join(',')
|
90
|
-
path = pathify(path)
|
91
|
-
|
92
|
-
# shorten long paths
|
93
|
-
if path.length > 250
|
94
|
-
path = Digest::MD5.hexdigest(path)
|
95
|
-
end
|
96
|
-
|
97
|
-
"#{dir}/#{path}"
|
98
|
-
end
|
99
|
-
protected :calculate_cache_key
|
100
|
-
|
101
|
-
def validate!
|
102
|
-
raise "invalid method #{method}" if !VALID_METHODS.include?(method)
|
103
|
-
raise "invalid url #{uri}" if uri.scheme !~ /^http/
|
104
|
-
raise "can't get with a body" if method == 'get' && body
|
105
|
-
raise "Content-Type doesn't make sense without a body" if content_type && !body
|
106
|
-
end
|
107
|
-
protected :validate!
|
108
|
-
|
109
|
-
def body
|
110
|
-
options[:body]
|
111
|
-
end
|
112
|
-
protected :body
|
113
|
-
|
114
|
-
def headers
|
115
|
-
options[:headers]
|
116
|
-
end
|
117
|
-
protected :headers
|
118
|
-
|
119
|
-
def content_type
|
120
|
-
headers && headers['Content-Type']
|
121
|
-
end
|
122
|
-
protected :content_type
|
123
|
-
|
124
|
-
def form?
|
125
|
-
content_type == 'application/x-www-form-urlencoded'
|
126
|
-
end
|
127
|
-
protected :form?
|
128
|
-
|
129
|
-
def pathify(s)
|
130
|
-
# remove leading slash
|
131
|
-
s = s.gsub(/^\//, '')
|
132
|
-
# .. => comma
|
133
|
-
s = s.gsub('..', ',')
|
134
|
-
# query separators => comma
|
135
|
-
s = s.gsub(/[?\/&]/, ',')
|
136
|
-
# ,, => comma
|
137
|
-
s = s.gsub(',,', ',')
|
138
|
-
# encode invalid path chars
|
139
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
140
|
-
hex = i.unpack('H2').first
|
141
|
-
"%#{hex}"
|
142
|
-
end
|
143
|
-
# handle empty case
|
144
|
-
s = '_root_' if s.blank?
|
145
|
-
# always downcase
|
146
|
-
s = s.downcase
|
147
|
-
s
|
148
|
-
end
|
149
|
-
protected :pathify
|
150
|
-
end
|
151
|
-
end
|