sinew 3.0.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sinew
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Doppelt
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-04 00:00:00.000000000 Z
12
+ date: 2021-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: amazing_print
@@ -54,47 +54,61 @@ dependencies:
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
- name: httpdisk
57
+ name: faraday-rate_limiter
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0'
62
+ version: '0.0'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0'
69
+ version: '0.0'
70
70
  - !ruby/object:Gem::Dependency
71
- name: nokogiri
71
+ name: hashie
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - "~>"
75
75
  - !ruby/object:Gem::Version
76
- version: '1.11'
76
+ version: '4.1'
77
77
  type: :runtime
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
81
  - - "~>"
82
82
  - !ruby/object:Gem::Version
83
- version: '1.11'
83
+ version: '4.1'
84
84
  - !ruby/object:Gem::Dependency
85
- name: scripto
85
+ name: httpdisk
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
88
  - - "~>"
89
89
  - !ruby/object:Gem::Version
90
- version: '0'
90
+ version: '0.5'
91
91
  type: :runtime
92
92
  prerelease: false
93
93
  version_requirements: !ruby/object:Gem::Requirement
94
94
  requirements:
95
95
  - - "~>"
96
96
  - !ruby/object:Gem::Version
97
- version: '0'
97
+ version: '0.5'
98
+ - !ruby/object:Gem::Dependency
99
+ name: nokogiri
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: '1.11'
105
+ type: :runtime
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: '1.11'
98
112
  - !ruby/object:Gem::Dependency
99
113
  name: slop
100
114
  requirement: !ruby/object:Gem::Requirement
@@ -134,26 +148,22 @@ files:
134
148
  - ".github/workflows/test.yml"
135
149
  - ".gitignore"
136
150
  - ".rubocop.yml"
137
- - ".vscode/extensions.json"
138
- - ".vscode/settings.json"
139
151
  - Gemfile
152
+ - Gemfile.lock
140
153
  - LICENSE
141
154
  - README.md
142
155
  - Rakefile
143
156
  - bin/sinew
144
157
  - lib/sinew.rb
145
- - lib/sinew/connection.rb
146
- - lib/sinew/connection/log_formatter.rb
147
- - lib/sinew/connection/rate_limit.rb
148
- - lib/sinew/core_ext.rb
149
- - lib/sinew/dsl.rb
158
+ - lib/sinew/args.rb
159
+ - lib/sinew/base.rb
160
+ - lib/sinew/csv.rb
150
161
  - lib/sinew/main.rb
162
+ - lib/sinew/middleware/log_formatter.rb
151
163
  - lib/sinew/nokogiri_ext.rb
152
- - lib/sinew/output.rb
153
- - lib/sinew/request.rb
154
164
  - lib/sinew/response.rb
155
- - lib/sinew/runtime_options.rb
156
165
  - lib/sinew/version.rb
166
+ - sample.rb
157
167
  - sample.sinew
158
168
  - sinew.gemspec
159
169
  homepage: http://github.com/gurgeous/sinew
@@ -1,3 +0,0 @@
1
- {
2
- "recommendations": ["rebornix.Ruby"]
3
- }
@@ -1,5 +0,0 @@
1
- {
2
- "files.associations": {
3
- "*.sinew": "ruby"
4
- }
5
- }
@@ -1,52 +0,0 @@
1
- require 'faraday'
2
- require 'faraday-encoding'
3
- require 'faraday/logging/formatter'
4
- require 'httpdisk'
5
- require 'sinew/connection/log_formatter'
6
- require 'sinew/connection/rate_limit'
7
-
8
- module Sinew
9
- module Connection
10
- def self.create(options:, runtime_options:)
11
- connection_options = {}
12
- connection_options[:ssl] = { verify: false } if runtime_options.insecure
13
-
14
- Faraday.new(nil, connection_options) do
15
- _1.use RateLimit, rate_limit: runtime_options.rate_limit
16
-
17
- # auto-encode form bodies
18
- _1.request :url_encoded
19
-
20
- # Before httpdisk so each redirect segment is cached
21
- # Keep track of redirect status for logger
22
- _1.response :follow_redirects, callback: ->(_old_env, new_env) { new_env[:redirect] = true }
23
-
24
- # set Ruby string encoding based on Content-Type (should be above httpdisk)
25
- _1.response :encoding
26
-
27
- # disk caching
28
- httpdisk_options = {
29
- dir: options[:cache],
30
- force: options[:force],
31
- force_errors: options[:force_errors],
32
- }.merge(runtime_options.httpdisk_options)
33
-
34
- _1.use :httpdisk, httpdisk_options
35
-
36
- # After httpdisk so that only non-cached requests are logged.
37
- # Before retry so that we don't log each retry attempt.
38
- _1.response :logger, nil, formatter: LogFormatter if !options[:quiet]
39
-
40
- # After httpdisk so transient failures are not cached
41
- retry_options = {
42
- interval: runtime_options.rate_limit,
43
- max: runtime_options.retries,
44
- methods: %w[delete get head options patch post put trace],
45
- retry_statuses: (500..600).to_a,
46
- retry_if: ->(_env, _err) { true },
47
- }
48
- _1.request :retry, retry_options
49
- end
50
- end
51
- end
52
- end
@@ -1,29 +0,0 @@
1
- module Sinew
2
- module Connection
3
- class RateLimit < Faraday::Middleware
4
- attr_reader :rate_limit
5
-
6
- def initialize(app, options = {})
7
- super(app)
8
-
9
- @last_request_tm = @current_request_tm = nil
10
- @rate_limit = options.fetch(:rate_limit, 1)
11
- end
12
-
13
- def on_request(_env)
14
- if @last_request_tm
15
- sleep = (@last_request_tm + rate_limit) - Time.now
16
- sleep(sleep) if sleep > 0
17
- end
18
-
19
- @current_request_tm = Time.now
20
- end
21
-
22
- def on_complete(env)
23
- # Only rate limit on uncached requests
24
- @last_request_tm = @current_request_tm unless env[:httpdisk]
25
- @current_request_tm = nil
26
- end
27
- end
28
- end
29
- end
@@ -1,59 +0,0 @@
1
- #
2
- # A few core extensions brought over from ActiveSupport. These are handy for
3
- # parsing.
4
- #
5
-
6
- class String
7
- def squish
8
- dup.squish!
9
- end
10
-
11
- def squish!
12
- strip!
13
- gsub!(/\s+/, ' ')
14
- self
15
- end
16
-
17
- def first(limit = 1)
18
- if limit == 0
19
- ''
20
- elsif limit >= size
21
- dup
22
- else
23
- self[0..limit - 1]
24
- end
25
- end
26
-
27
- def last(limit = 1)
28
- if limit == 0
29
- ''
30
- elsif limit >= size
31
- dup
32
- else
33
- self[-limit..]
34
- end
35
- end
36
-
37
- alias starts_with? start_with?
38
- alias ends_with? end_with?
39
- end
40
-
41
- #
42
- # blank?/present?
43
- #
44
-
45
- class Object
46
- def blank?
47
- respond_to?(:empty?) ? !!empty? : !self
48
- end
49
-
50
- def present?
51
- !blank?
52
- end
53
- end
54
-
55
- class String
56
- def blank?
57
- !!(self =~ /\A\s*\z/)
58
- end
59
- end
data/lib/sinew/dsl.rb DELETED
@@ -1,115 +0,0 @@
1
- require 'amazing_print'
2
- require 'cgi'
3
- require 'json'
4
-
5
- #
6
- # The DSL available to .sinew files.
7
- #
8
-
9
- module Sinew
10
- class DSL
11
- # this is used to break out of --limit
12
- class LimitError < StandardError; end
13
-
14
- attr_reader :sinew, :uri, :raw, :code, :elapsed
15
-
16
- def initialize(sinew)
17
- @sinew = sinew
18
- end
19
-
20
- def run
21
- tm = Time.now
22
- begin
23
- recipe = sinew.options[:recipe]
24
- instance_eval(File.read(recipe, mode: 'rb'), recipe)
25
- rescue LimitError
26
- # ignore - this is flow control for --limit
27
- end
28
- @elapsed = Time.now - tm
29
- end
30
-
31
- #
32
- # request
33
- #
34
-
35
- def get(url, query = {})
36
- http('get', url, query: query)
37
- end
38
-
39
- def post(url, form = {})
40
- body = form
41
- headers = {
42
- 'Content-Type' => 'application/x-www-form-urlencoded',
43
- }
44
- http('post', url, body: body, headers: headers)
45
- end
46
-
47
- def post_json(url, json = {})
48
- body = json.to_json
49
- headers = {
50
- 'Content-Type' => 'application/json',
51
- }
52
- http('post', url, body: body, headers: headers)
53
- end
54
-
55
- def http(method, url, options = {})
56
- # these need to be cleared before each request
57
- %i[@html @noko @xml @json].each do |i|
58
- instance_variable_set(i, nil)
59
- end
60
-
61
- # fetch and make response available to callers
62
- response = sinew.http(method, url, options)
63
- @uri, @raw, @code = response.uri, response.body, response.code
64
-
65
- # don't confuse the user
66
- nil
67
- end
68
-
69
- #
70
- # response
71
- #
72
-
73
- def html
74
- @html ||= begin
75
- s = raw.dup
76
- # squish!
77
- s.squish!
78
- # kill whitespace around tags
79
- s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
80
- s
81
- end
82
- end
83
-
84
- def noko
85
- @noko ||= Nokogiri::HTML(html)
86
- end
87
-
88
- def xml
89
- @xml ||= Nokogiri::XML(html)
90
- end
91
-
92
- def json
93
- @json ||= JSON.parse(raw, symbolize_names: true)
94
- end
95
-
96
- def url
97
- uri.to_s
98
- end
99
-
100
- #
101
- # csv
102
- #
103
-
104
- def csv_header(*args)
105
- sinew.output.header(args)
106
- end
107
-
108
- def csv_emit(row)
109
- sinew.output.emit(row)
110
- if sinew.output.count == sinew.options[:limit]
111
- raise LimitError.new
112
- end
113
- end
114
- end
115
- end
data/lib/sinew/output.rb DELETED
@@ -1,133 +0,0 @@
1
- require 'csv'
2
- require 'set'
3
- require 'sterile'
4
-
5
- #
6
- # CSV output.
7
- #
8
-
9
- module Sinew
10
- class Output
11
- attr_reader :sinew, :columns, :rows, :urls, :csv
12
-
13
- def initialize(sinew)
14
- @sinew = sinew
15
- @rows = []
16
- @urls = Set.new
17
- end
18
-
19
- def filename
20
- @filename ||= begin
21
- recipe = sinew.options[:recipe]
22
- ext = File.extname(recipe)
23
- if ext.empty?
24
- "#{recipe}.csv"
25
- else
26
- recipe.gsub(ext, '.csv')
27
- end
28
- end
29
- end
30
-
31
- def header(columns)
32
- sinew.banner("Writing to #{filename}...") if !sinew.quiet?
33
-
34
- columns = columns.flatten
35
- @columns = columns
36
-
37
- # open csv, write header row
38
- @csv = CSV.open(filename, 'wb')
39
- csv << columns
40
- end
41
-
42
- def emit(row)
43
- # implicit header if necessary
44
- header(row.keys) if !csv
45
-
46
- # don't allow duplicate urls
47
- return if dup_url?(row)
48
-
49
- rows << row.dup
50
-
51
- # map columns to row, and normalize along the way
52
- print = {}
53
- row = columns.map do |i|
54
- value = normalize(row[i])
55
- print[i] = value if value.present?
56
- value
57
- end
58
-
59
- # print
60
- sinew.vputs print.ai
61
-
62
- csv << row
63
- csv.flush
64
- end
65
-
66
- def count
67
- rows.length
68
- end
69
-
70
- def report
71
- return if count == 0
72
-
73
- sinew.banner("Got #{count} rows.")
74
-
75
- # calculate counts
76
- counts = Hash.new(0)
77
- rows.each do |row|
78
- row.each_pair { |k, v| counts[k] += 1 if v.present? }
79
- end
80
- # sort by counts
81
- cols = columns.sort_by { |i| [ -counts[i], i ] }
82
-
83
- # report
84
- len = cols.map { |i| i.to_s.length }.max
85
- fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
86
- cols.each do |col|
87
- $stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
88
- end
89
- end
90
-
91
- def normalize(s)
92
- # noko/array/misc => string
93
- s = case s
94
- when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
95
- s.inner_html
96
- when Array
97
- s.map(&:to_s).join('|')
98
- else
99
- s.to_s
100
- end
101
-
102
- # strip html tags. Note that we replace tags with spaces
103
- s = s.gsub(/<[^>]+>/, ' ')
104
-
105
- # Converts MS Word 'smart punctuation' to ASCII
106
- s = Sterile.plain_format(s)
107
-
108
- # &aacute; &amp; etc.
109
- s = Sterile.decode_entities(s)
110
-
111
- # "šţɽĩɳģ" => "string"
112
- s = Sterile.transliterate(s)
113
-
114
- # squish
115
- s = s.squish
116
-
117
- s
118
- end
119
- protected :normalize
120
-
121
- def dup_url?(row)
122
- if url = row[:url]
123
- if urls.include?(url)
124
- sinew.warning("duplicate url: #{url}") if !sinew.quiet?
125
- return true
126
- end
127
- urls << url
128
- end
129
- false
130
- end
131
- protected :dup_url?
132
- end
133
- end