sinew 3.0.1 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sinew/dsl.rb DELETED
@@ -1,115 +0,0 @@
1
- require 'amazing_print'
2
- require 'cgi'
3
- require 'json'
4
-
5
- #
6
- # The DSL available to .sinew files.
7
- #
8
-
9
- module Sinew
10
- class DSL
11
- # this is used to break out of --limit
12
- class LimitError < StandardError; end
13
-
14
- attr_reader :sinew, :uri, :raw, :code, :elapsed
15
-
16
- def initialize(sinew)
17
- @sinew = sinew
18
- end
19
-
20
- def run
21
- tm = Time.now
22
- begin
23
- recipe = sinew.options[:recipe]
24
- instance_eval(File.read(recipe, mode: 'rb'), recipe)
25
- rescue LimitError
26
- # ignore - this is flow control for --limit
27
- end
28
- @elapsed = Time.now - tm
29
- end
30
-
31
- #
32
- # request
33
- #
34
-
35
- def get(url, query = {})
36
- http('get', url, query: query)
37
- end
38
-
39
- def post(url, form = {})
40
- body = form
41
- headers = {
42
- 'Content-Type' => 'application/x-www-form-urlencoded',
43
- }
44
- http('post', url, body: body, headers: headers)
45
- end
46
-
47
- def post_json(url, json = {})
48
- body = json.to_json
49
- headers = {
50
- 'Content-Type' => 'application/json',
51
- }
52
- http('post', url, body: body, headers: headers)
53
- end
54
-
55
- def http(method, url, options = {})
56
- # these need to be cleared before each request
57
- %i[@html @noko @xml @json].each do |i|
58
- instance_variable_set(i, nil)
59
- end
60
-
61
- # fetch and make response available to callers
62
- response = sinew.http(method, url, options)
63
- @uri, @raw, @code = response.uri, response.body, response.code
64
-
65
- # don't confuse the user
66
- nil
67
- end
68
-
69
- #
70
- # response
71
- #
72
-
73
- def html
74
- @html ||= begin
75
- s = raw.dup
76
- # squish!
77
- s.squish!
78
- # kill whitespace around tags
79
- s.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
80
- s
81
- end
82
- end
83
-
84
- def noko
85
- @noko ||= Nokogiri::HTML(html)
86
- end
87
-
88
- def xml
89
- @xml ||= Nokogiri::XML(html)
90
- end
91
-
92
- def json
93
- @json ||= JSON.parse(raw, symbolize_names: true)
94
- end
95
-
96
- def url
97
- uri.to_s
98
- end
99
-
100
- #
101
- # csv
102
- #
103
-
104
- def csv_header(*args)
105
- sinew.output.header(args)
106
- end
107
-
108
- def csv_emit(row)
109
- sinew.output.emit(row)
110
- if sinew.output.count == sinew.options[:limit]
111
- raise LimitError.new
112
- end
113
- end
114
- end
115
- end
data/lib/sinew/output.rb DELETED
@@ -1,133 +0,0 @@
1
- require 'csv'
2
- require 'set'
3
- require 'sterile'
4
-
5
- #
6
- # CSV output.
7
- #
8
-
9
- module Sinew
10
- class Output
11
- attr_reader :sinew, :columns, :rows, :urls, :csv
12
-
13
- def initialize(sinew)
14
- @sinew = sinew
15
- @rows = []
16
- @urls = Set.new
17
- end
18
-
19
- def filename
20
- @filename ||= begin
21
- recipe = sinew.options[:recipe]
22
- ext = File.extname(recipe)
23
- if ext.empty?
24
- "#{recipe}.csv"
25
- else
26
- recipe.gsub(ext, '.csv')
27
- end
28
- end
29
- end
30
-
31
- def header(columns)
32
- sinew.banner("Writing to #{filename}...") if !sinew.quiet?
33
-
34
- columns = columns.flatten
35
- @columns = columns
36
-
37
- # open csv, write header row
38
- @csv = CSV.open(filename, 'wb')
39
- csv << columns
40
- end
41
-
42
- def emit(row)
43
- # implicit header if necessary
44
- header(row.keys) if !csv
45
-
46
- # don't allow duplicate urls
47
- return if dup_url?(row)
48
-
49
- rows << row.dup
50
-
51
- # map columns to row, and normalize along the way
52
- print = {}
53
- row = columns.map do |i|
54
- value = normalize(row[i])
55
- print[i] = value if value.present?
56
- value
57
- end
58
-
59
- # print
60
- sinew.vputs print.ai
61
-
62
- csv << row
63
- csv.flush
64
- end
65
-
66
- def count
67
- rows.length
68
- end
69
-
70
- def report
71
- return if count == 0
72
-
73
- sinew.banner("Got #{count} rows.")
74
-
75
- # calculate counts
76
- counts = Hash.new(0)
77
- rows.each do |row|
78
- row.each_pair { |k, v| counts[k] += 1 if v.present? }
79
- end
80
- # sort by counts
81
- cols = columns.sort_by { |i| [ -counts[i], i ] }
82
-
83
- # report
84
- len = cols.map { |i| i.to_s.length }.max
85
- fmt = " %-#{len + 1}s %7d / %-7d %6.1f%%\n"
86
- cols.each do |col|
87
- $stderr.printf(fmt, col, counts[col], count, counts[col] * 100.0 / count)
88
- end
89
- end
90
-
91
- def normalize(s)
92
- # noko/array/misc => string
93
- s = case s
94
- when Nokogiri::XML::Element, Nokogiri::XML::NodeSet
95
- s.inner_html
96
- when Array
97
- s.map(&:to_s).join('|')
98
- else
99
- s.to_s
100
- end
101
-
102
- # strip html tags. Note that we replace tags with spaces
103
- s = s.gsub(/<[^>]+>/, ' ')
104
-
105
- # Converts MS Word 'smart punctuation' to ASCII
106
- s = Sterile.plain_format(s)
107
-
108
- # &aacute; &amp; etc.
109
- s = Sterile.decode_entities(s)
110
-
111
- # "šţɽĩɳģ" => "string"
112
- s = Sterile.transliterate(s)
113
-
114
- # squish
115
- s = s.squish
116
-
117
- s
118
- end
119
- protected :normalize
120
-
121
- def dup_url?(row)
122
- if url = row[:url]
123
- if urls.include?(url)
124
- sinew.warning("duplicate url: #{url}") if !sinew.quiet?
125
- return true
126
- end
127
- urls << url
128
- end
129
- false
130
- end
131
- protected :dup_url?
132
- end
133
- end
data/lib/sinew/request.rb DELETED
@@ -1,86 +0,0 @@
1
- require 'sterile'
2
-
3
- #
4
- # Process a single HTTP request.
5
- #
6
-
7
- module Sinew
8
- class Error < StandardError; end
9
-
10
- class Request
11
- VALID_METHODS = %w[get post patch put delete head options].freeze
12
- METHODS_WITH_BODY = %w[patch post put].freeze
13
-
14
- attr_reader :method, :options, :uri
15
-
16
- # Supported options:
17
- # body: Body of http post
18
- # headers: Hash of HTTP headers (combined with runtime_options.headers)
19
- # query: Hash of query parameters to add to url
20
- def initialize(method, url, options = {})
21
- @method = method
22
- @options = options.dup
23
- @uri = parse_url(url)
24
- end
25
-
26
- # run the request, return the result
27
- def perform(connection)
28
- validate!
29
-
30
- body = options.delete(:body)
31
- fday_response = connection.send(method, uri, body) do
32
- _1.headers.update(options[:headers]) if options[:headers]
33
- _1.options[:proxy] = options[:proxy]
34
- end
35
-
36
- Response.from_network(self, fday_response)
37
- end
38
-
39
- # We accept sloppy urls and attempt to clean them up
40
- def parse_url(url)
41
- s = url.to_s
42
-
43
- # remove entities
44
- s = Sterile.decode_entities(s)
45
-
46
- # fix a couple of common encoding bugs
47
- s = s.gsub(' ', '%20')
48
- s = s.gsub("'", '%27')
49
-
50
- # append query manually (instead of letting Faraday handle it) for consistent
51
- # Request#uri and Response#uri
52
- query = options.delete(:query)
53
- if query.present?
54
- q = Faraday::Utils.default_params_encoder.encode(query)
55
- separator = s.include?('?') ? '&' : '?'
56
- s = "#{s}#{separator}#{q}"
57
- end
58
-
59
- URI.parse(s)
60
- end
61
- protected :parse_url
62
-
63
- def validate!
64
- raise "invalid method #{method}" if !VALID_METHODS.include?(method)
65
- raise "invalid url #{uri}" if uri.scheme !~ /^http/
66
- raise "can't #{method} with a body" if body && !METHODS_WITH_BODY.include?(method)
67
- raise "Content-Type doesn't make sense without a body" if content_type && !body
68
- end
69
- protected :validate!
70
-
71
- def body
72
- options[:body]
73
- end
74
- protected :body
75
-
76
- def headers
77
- options[:headers]
78
- end
79
- protected :headers
80
-
81
- def content_type
82
- headers && headers['Content-Type']
83
- end
84
- protected :content_type
85
- end
86
- end
@@ -1,28 +0,0 @@
1
- #
2
- # Runtime options that sinew files can modify.
3
- #
4
-
5
- module Sinew
6
- class RuntimeOptions
7
- attr_accessor :retries
8
- attr_accessor :rate_limit
9
- attr_accessor :headers
10
- attr_accessor :httpdisk_options
11
- attr_accessor :insecure
12
-
13
- def initialize
14
- self.retries = 3
15
- self.rate_limit = 1
16
- self.headers = {
17
- 'User-Agent' => "sinew/#{VERSION}",
18
- }
19
- self.httpdisk_options = {}
20
- self.insecure = false
21
-
22
- # for testing
23
- if ENV['SINEW_TEST']
24
- self.rate_limit = 0
25
- end
26
- end
27
- end
28
- end