sinew 3.0.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-04 00:00:00.000000000 Z
12
+ date: 2021-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: amazing_print
@@ -54,47 +54,61 @@ dependencies:
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
- name: httpdisk
57
+ name: faraday-rate_limiter
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0'
62
+ version: '0.0'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0'
69
+ version: '0.0'
70
70
  - !ruby/object:Gem::Dependency
71
- name: nokogiri
71
+ name: hashie
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '1.11'
76
+ version: '4.1'
77
77
  type: :runtime
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: '1.11'
83
+ version: '4.1'
84
84
  - !ruby/object:Gem::Dependency
85
- name: scripto
85
+ name: httpdisk
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - "~>"
89
89
  - !ruby/object:Gem::Version
90
- version: '0'
90
+ version: '0.5'
91
91
  type: :runtime
92
92
  prerelease: false
93
93
  version_requirements: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
- version: '0'
97
+ version: '0.5'
98
+ - !ruby/object:Gem::Dependency
99
+ name: nokogiri
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '1.11'
105
+ type: :runtime
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '1.11'
98
112
  - !ruby/object:Gem::Dependency
99
113
  name: slop
100
114
  requirement: !ruby/object:Gem::Requirement
@@ -134,26 +148,22 @@ files:
134
148
  - ".github/workflows/test.yml"
135
149
  - ".gitignore"
136
150
  - ".rubocop.yml"
137
- - ".vscode/extensions.json"
138
- - ".vscode/settings.json"
139
151
  - Gemfile
152
+ - Gemfile.lock
140
153
  - LICENSE
141
154
  - README.md
142
155
  - Rakefile
143
156
  - bin/sinew
144
157
  - lib/sinew.rb
145
- - lib/sinew/connection.rb
146
- - lib/sinew/connection/log_formatter.rb
147
- - lib/sinew/connection/rate_limit.rb
148
- - lib/sinew/core_ext.rb
149
- - lib/sinew/dsl.rb
158
+ - lib/sinew/args.rb
159
+ - lib/sinew/base.rb
160
+ - lib/sinew/csv.rb
150
161
  - lib/sinew/main.rb
162
+ - lib/sinew/middleware/log_formatter.rb
151
163
  - lib/sinew/nokogiri_ext.rb
152
- - lib/sinew/output.rb
153
- - lib/sinew/request.rb
154
164
  - lib/sinew/response.rb
155
- - lib/sinew/runtime_options.rb
156
165
  - lib/sinew/version.rb
166
+ - sample.rb
157
167
  - sample.sinew
158
168
  - sinew.gemspec
159
169
  homepage: http://github.com/gurgeous/sinew
@@ -1,3 +0,0 @@
1
- {
2
- "recommendations": ["rebornix.Ruby"]
3
- }
@@ -1,5 +0,0 @@
1
- {
2
- "files.associations": {
3
- "*.sinew": "ruby"
4
- }
5
- }
@@ -1,52 +0,0 @@
1
- require 'faraday'
2
- require 'faraday-encoding'
3
- require 'faraday/logging/formatter'
4
- require 'httpdisk'
5
- require 'sinew/connection/log_formatter'
6
- require 'sinew/connection/rate_limit'
7
-
8
- module Sinew
9
- module Connection
10
- def self.create(options:, runtime_options:)
11
- connection_options = {}
12
- connection_options[:ssl] = { verify: false } if runtime_options.insecure
13
-
14
- Faraday.new(nil, connection_options) do
15
- _1.use RateLimit, rate_limit: runtime_options.rate_limit
16
-
17
- # auto-encode form bodies
18
- _1.request :url_encoded
19
-
20
- # Before httpdisk so each redirect segment is cached
21
- # Keep track of redirect status for logger
22
- _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
23
-
24
- # set Ruby string encoding based on Content-Type (should be above httpdisk)
25
- _1.response :encoding
26
-
27
- # disk caching
28
- httpdisk_options = {
29
- dir: options[:cache],
30
- force: options[:force],
31
- force_errors: options[:force_errors],
32
- }.merge(runtime_options.httpdisk_options)
33
-
34
- _1.use :httpdisk, httpdisk_options
35
-
36
- # After httpdisk so that only non-cached requests are logged.
37
- # Before retry so that we don't log each retry attempt.
38
- _1.response :logger, nil, formatter: LogFormatter if !options[:quiet]
39
-
40
- # After httpdisk so transient failures are not cached
41
- retry_options = {
42
- interval: runtime_options.rate_limit,
43
- max: runtime_options.retries,
44
- methods: %w[delete get head options patch post put trace],
45
- retry_statuses: (500..600).to_a,
46
- retry_if: ->(_env, _err) { true },
47
- }
48
- _1.request :retry, retry_options
49
- end
50
- end
51
- end
52
- end
@@ -1,29 +0,0 @@
1
- module Sinew
2
- module Connection
3
- class RateLimit < Faraday::Middleware
4
- attr_reader :rate_limit
5
-
6
- def initialize(app, options = {})
7
- super(app)
8
-
9
- @last_request_tm = @current_request_tm = nil
10
- @rate_limit = options.fetch(:rate_limit, 1)
11
- end
12
-
13
- def on_request(_env)
14
- if @last_request_tm
15
- sleep = (@last_request_tm + rate_limit) - Time.now
16
- sleep(sleep) if sleep > 0
17
- end
18
-
19
- @current_request_tm = Time.now
20
- end
21
-
22
- def on_complete(env)
23
- # Only rate limit on uncached requests
24
- @last_request_tm = @current_request_tm unless env[:httpdisk]
25
- @current_request_tm = nil
26
- end
27
- end
28
- end
29
- end
@@ -1,59 +0,0 @@
1
- #
2
- # A few core extensions brought over from ActiveSupport. These are handy for
3
- # parsing.
4
- #
5
-
6
- class String
7
- def squish
8
- dup.squish!
9
- end
10
-
11
- def squish!
12
- strip!
13
- gsub!(/\s+/, ' ')
14
- self
15
- end
16
-
17
- def first(limit = 1)
18
- if limit == 0
19
- ''
20
- elsif limit >= size
21
- dup
22
- else
23
- self[0..limit - 1]
24
- end
25
- end
26
-
27
- def last(limit = 1)
28
- if limit == 0
29
- ''
30
- elsif limit >= size
31
- dup
32
- else
33
- self[-limit..]
34
- end
35
- end
36
-
37
- alias starts_with? start_with?
38
- alias ends_with? end_with?
39
- end
40
-
41
- #
42
- # blank?/present?
43
- #
44
-
45
- class Object
46
- def blank?
47
- respond_to?(:empty?) ? !!empty? : !self
48
- end
49
-
50
- def present?
51
- !blank?
52
- end
53
- end
54
-
55
- class String
56
- def blank?
57
- !!(self =~ /\A\s*\z/)
58
- end
59
- end
data/lib/sinew/dsl.rb DELETED
@@ -1,115 +0,0 @@
1
- require 'amazing_print'
2
- require 'cgi'
3
- require 'json'
4
-
5
- #
6
- # The DSL available to .sinew files.
7
- #
8
-
9
- module Sinew
10
- class DSL
11
- # this is used to break out of --limit
12
- class LimitError < StandardError; end
13
-
14
- attr_reader :sinew, :uri, :raw, :code, :elapsed
15
-
16
- def initialize(sinew)
17
- @sinew = sinew
18
- end
19
-
20
- def run
21
- tm = Time.now
22
- begin
23
- recipe = sinew.options[:recipe]
24
- instance_eval(File.read(recipe, mode: 'rb'), recipe)
25
- rescue LimitError
26
- # ignore - this is flow control for --limit
27
- end
28
- @elapsed = Time.now - tm
29
- end
30
-
31
- #
32
- # request
33
- #
34
-
35
- def get(url, query = {})
36
- http('get', url, query: query)
37
- end
38
-
39
- def post(url, form = {})
40
- body = form
41
- headers = {
42
- 'Content-Type' => 'application/x-www-form-urlencoded',
43
- }
44
- http('post', url, body: body, headers: headers)
45
- end
46
-
47
- def post_json(url, json = {})
48
- body = json.to_json
49
- headers = {
50
- 'Content-Type' => 'application/json',
51
- }
52
- http('post', url, body: body, headers: headers)
53
- end
54
-
55
- def http(method, url, options = {})
56
- # these need to be cleared before each request
57
- %i[@html @noko @xml @json].each do |i|
58
- instance_variable_set(i, nil)
59
- end
60
-
61
- # fetch and make response available to callers
62
- response = sinew.http(method, url, options)
63
- @uri, @raw, @code = response.uri, response.body, response.code
64
-
65
- # don't confuse the user
66
- nil
67
- end
68
-
69
- #
70
- # response
71
- #
72
-
73
- def html
74
- @html ||= begin
75
- s = raw.dup
76
- # squish!
77
- s.squish!
78
- # kill whitespace around tags
79
- s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
80
- s
81
- end
82
- end
83
-
84
- def noko
85
- @noko ||= Nokogiri::HTML(html)
86
- end
87
-
88
- def xml
89
- @xml ||= Nokogiri::XML(html)
90
- end
91
-
92
- def json
93
- @json ||= JSON.parse(raw, symbolize_names: true)
94
- end
95
-
96
- def url
97
- uri.to_s
98
- end
99
-
100
- #
101
- # csv
102
- #
103
-
104
- def csv_header(*args)
105
- sinew.output.header(args)
106
- end
107
-
108
- def csv_emit(row)
109
- sinew.output.emit(row)
110
- if sinew.output.count == sinew.options[:limit]
111
- raise LimitError.new
112
- end
113
- end
114
- end
115
- end
data/lib/sinew/output.rb DELETED
@@ -1,133 +0,0 @@
1
- require 'csv'
2
- require 'set'
3
- require 'sterile'
4
-
5
- #
6
- # CSV output.
7
- #
8
-
9
- module Sinew
10
- class Output
11
- attr_reader :sinew, :columns, :rows, :urls, :csv
12
-
13
- def initialize(sinew)
14
- @sinew = sinew
15
- @rows = []
16
- @urls = Set.new
17
- end
18
-
19
- def filename
20
- @filename ||= begin
21
- recipe = sinew.options[:recipe]
22
- ext = File.extname(recipe)
23
- if ext.empty?
24
- "#{recipe}.csv"
25
- else
26
- recipe.gsub(ext, '.csv')
27
- end
28
- end
29
- end
30
-
31
- def header(columns)
32
- sinew.banner("Writing to #{filename}...") if !sinew.quiet?
33
-
34
- columns = columns.flatten
35
- @columns = columns
36
-
37
- # open csv, write header row
38
- @csv = CSV.open(filename, 'wb')
39
- csv << columns
40
- end
41
-
42
- def emit(row)
43
- # implicit header if necessary
44
- header(row.keys) if !csv
45
-
46
- # don't allow duplicate urls
47
- return if dup_url?(row)
48
-
49
- rows << row.dup
50
-
51
- # map columns to row, and normalize along the way
52
- print = {}
53
- row = columns.map do |i|
54
- value = normalize(row[i])
55
- print[i] = value if value.present?
56
- value
57
- end
58
-
59
- # print
60
- sinew.vputs print.ai
61
-
62
- csv << row
63
- csv.flush
64
- end
65
-
66
- def count
67
- rows.length
68
- end
69
-
70
- def report
71
- return if count == 0
72
-
73
- sinew.banner("Got #{count} rows.")
74
-
75
- # calculate counts
76
- counts = Hash.new(0)
77
- rows.each do |row|
78
- row.each_pair { |k, v| counts[k] += 1 if v.present? }
79
- end
80
- # sort by counts
81
- cols = columns.sort_by { |i| [ -counts[i], i ] }
82
-
83
- # report
84
- len = cols.map { |i| i.to_s.length }.max
85
- fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
86
- cols.each do |col|
87
- $stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
88
- end
89
- end
90
-
91
- def normalize(s)
92
- # noko/array/misc => string
93
- s = case s
94
- when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
95
- s.inner_html
96
- when Array
97
- s.map(&:to_s).join('|')
98
- else
99
- s.to_s
100
- end
101
-
102
- # strip html tags. Note that we replace tags with spaces
103
- s = s.gsub(/<[^>]+>/, ' ')
104
-
105
- # Converts MS Word 'smart punctuation' to ASCII
106
- s = Sterile.plain_format(s)
107
-
108
- # &aacute; &amp; etc.
109
- s = Sterile.decode_entities(s)
110
-
111
- # "šţɽĩɳģ" => "string"
112
- s = Sterile.transliterate(s)
113
-
114
- # squish
115
- s = s.squish
116
-
117
- s
118
- end
119
- protected :normalize
120
-
121
- def dup_url?(row)
122
- if url = row[:url]
123
- if urls.include?(url)
124
- sinew.warning("duplicate url: #{url}") if !sinew.quiet?
125
- return true
126
- end
127
- urls << url
128
- end
129
- false
130
- end
131
- protected :dup_url?
132
- end
133
- end