sinew 4.0.0 → 4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +4 -8
- data/.rubocop.yml +12 -29
- data/Gemfile +7 -7
- data/Gemfile.lock +78 -70
- data/LICENSE +1 -1
- data/README.md +6 -2
- data/Rakefile +3 -52
- data/bin/sinew +5 -5
- data/justfile +59 -0
- data/lib/sinew/args.rb +19 -19
- data/lib/sinew/base.rb +15 -14
- data/lib/sinew/csv.rb +8 -8
- data/lib/sinew/main.rb +4 -4
- data/lib/sinew/middleware/log_formatter.rb +1 -1
- data/lib/sinew/nokogiri_ext.rb +5 -5
- data/lib/sinew/response.rb +6 -6
- data/lib/sinew/version.rb +1 -1
- data/lib/sinew.rb +8 -8
- data/sample.rb +5 -5
- data/sinew.gemspec +21 -20
- metadata +33 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 30b6d7da43f53885bd23a2d283ec2dc34f0f38640d4bac3c5f3275deed4f8e84
|
4
|
+
data.tar.gz: dfb86670352efb63ddad965418b619ab74edf8738a7ae55ed4c8ea521c2f625c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 230c4c81b27dd5087bb44ac3b89b29bed65222d81327c4dd8580c5762e50cb1d54d5521d5d3349f7d40a228244e9fc436d502fa22e879909ce2b983b9d1cee0f
|
7
|
+
data.tar.gz: ce76f649971d961bd22de2f8b71f1226838b9e8385dd269fe2e695e93164e850b16fff3a6c4c42b5fb961beff9a6675deb6fa5e66060053274a464d878cd0d35
|
data/.github/workflows/test.yml
CHANGED
@@ -2,11 +2,7 @@ name: test
|
|
2
2
|
|
3
3
|
on:
|
4
4
|
push:
|
5
|
-
paths-ignore:
|
6
|
-
- '**.md'
|
7
5
|
pull_request:
|
8
|
-
paths-ignore:
|
9
|
-
- '**.md'
|
10
6
|
workflow_dispatch:
|
11
7
|
|
12
8
|
jobs:
|
@@ -15,12 +11,12 @@ jobs:
|
|
15
11
|
max-parallel: 3
|
16
12
|
matrix:
|
17
13
|
os: [ubuntu, macos]
|
18
|
-
ruby-version: [3.
|
14
|
+
ruby-version: [head, 3.2, 3.1]
|
19
15
|
runs-on: ${{ matrix.os }}-latest
|
20
16
|
steps:
|
21
|
-
- uses: actions/checkout@
|
17
|
+
- uses: actions/checkout@v3
|
18
|
+
- uses: taiki-e/install-action@just
|
22
19
|
- uses: ruby/setup-ruby@v1
|
23
20
|
with:
|
24
21
|
ruby-version: ${{ matrix.ruby-version }}
|
25
|
-
- run:
|
26
|
-
- run: bundle exec rake test
|
22
|
+
- run: just ci
|
data/.rubocop.yml
CHANGED
@@ -1,34 +1,17 @@
|
|
1
|
+
require:
|
2
|
+
- standard
|
3
|
+
|
4
|
+
inherit_gem:
|
5
|
+
standard: config/base.yml
|
6
|
+
|
1
7
|
AllCops:
|
2
8
|
NewCops: enable
|
3
9
|
SuggestExtensions: false
|
4
|
-
TargetRubyVersion:
|
10
|
+
TargetRubyVersion: 3.1
|
5
11
|
|
6
|
-
#
|
7
|
-
|
12
|
+
#
|
13
|
+
# fight with standardrb!
|
14
|
+
#
|
8
15
|
|
9
|
-
|
10
|
-
|
11
|
-
Layout/EndAlignment: { EnforcedStyleAlignWith: variable }
|
12
|
-
Lint/AssignmentInCondition: { Enabled: false }
|
13
|
-
Lint/NonLocalExitFromIterator: { Enabled: false }
|
14
|
-
Metrics: { Enabled: false }
|
15
|
-
Naming/HeredocDelimiterNaming: { Enabled: false }
|
16
|
-
Naming/MethodParameterName: { Enabled: false }
|
17
|
-
Naming/VariableNumber: { Enabled: false }
|
18
|
-
Style/AsciiComments: { Enabled: false }
|
19
|
-
Style/ClassVars: { Enabled: false }
|
20
|
-
Style/CommentAnnotation: { Enabled: false }
|
21
|
-
Style/Documentation: { Enabled: false }
|
22
|
-
Style/DoubleNegation: { Enabled: false }
|
23
|
-
Style/EmptyCaseCondition: { Enabled: false }
|
24
|
-
Style/FormatStringToken: { Enabled: false }
|
25
|
-
Style/FrozenStringLiteralComment: { Enabled: false }
|
26
|
-
Style/GuardClause: { Enabled: false }
|
27
|
-
Style/IfUnlessModifier: { Enabled: false }
|
28
|
-
Style/NegatedIf: { Enabled: false }
|
29
|
-
Style/NumericPredicate: { Enabled: false }
|
30
|
-
Style/ParallelAssignment: { Enabled: false }
|
31
|
-
Style/StderrPuts: { Enabled: false }
|
32
|
-
Style/StringConcatenation: { Enabled: false }
|
33
|
-
Style/TrailingCommaInArrayLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
34
|
-
Style/TrailingCommaInHashLiteral: { EnforcedStyleForMultiline: consistent_comma }
|
16
|
+
Style/RedundantReturn: { Enabled: false }
|
17
|
+
Style/HashSyntax: { EnforcedShorthandSyntax: always }
|
data/Gemfile
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
source
|
1
|
+
source "http://rubygems.org"
|
2
2
|
gemspec
|
3
3
|
|
4
4
|
group :development, :test do
|
5
|
-
gem
|
6
|
-
gem
|
7
|
-
gem
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
5
|
+
gem "minitest"
|
6
|
+
gem "mocha"
|
7
|
+
gem "pry"
|
8
|
+
gem "rake"
|
9
|
+
gem "standard", require: false
|
10
|
+
gem "webmock"
|
11
11
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,124 +1,132 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
sinew (4.0.
|
5
|
-
amazing_print (~> 1.
|
6
|
-
faraday (~>
|
7
|
-
faraday-encoding (~> 0)
|
4
|
+
sinew (4.0.1)
|
5
|
+
amazing_print (~> 1.5)
|
6
|
+
faraday (~> 2.7)
|
7
|
+
faraday-encoding (~> 0.0)
|
8
8
|
faraday-rate_limiter (~> 0.0)
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
faraday-retry (~> 2.0)
|
10
|
+
hashie (~> 5.0)
|
11
|
+
httpdisk (~> 1.0)
|
12
|
+
nokogiri (~> 1.15)
|
13
|
+
slop (~> 4.10)
|
13
14
|
sterile (~> 1.0)
|
14
15
|
|
15
16
|
GEM
|
16
17
|
remote: http://rubygems.org/
|
17
18
|
specs:
|
18
|
-
addressable (2.8.
|
19
|
-
public_suffix (>= 2.0.2, <
|
20
|
-
amazing_print (1.
|
19
|
+
addressable (2.8.5)
|
20
|
+
public_suffix (>= 2.0.2, < 6.0)
|
21
|
+
amazing_print (1.5.0)
|
21
22
|
ast (2.4.2)
|
22
23
|
coderay (1.1.3)
|
23
|
-
content-type (0.0.
|
24
|
-
parslet (~>
|
24
|
+
content-type (0.0.2)
|
25
|
+
parslet (~> 2.0)
|
25
26
|
crack (0.4.5)
|
26
27
|
rexml
|
27
28
|
domain_name (0.5.20190701)
|
28
29
|
unf (>= 0.0.5, < 1.0.0)
|
29
|
-
faraday (
|
30
|
-
faraday-
|
31
|
-
faraday-em_synchrony (~> 1.0)
|
32
|
-
faraday-excon (~> 1.1)
|
33
|
-
faraday-httpclient (~> 1.0.1)
|
34
|
-
faraday-net_http (~> 1.0)
|
35
|
-
faraday-net_http_persistent (~> 1.1)
|
36
|
-
faraday-patron (~> 1.0)
|
37
|
-
multipart-post (>= 1.2, < 3)
|
30
|
+
faraday (2.7.10)
|
31
|
+
faraday-net_http (>= 2.0, < 3.1)
|
38
32
|
ruby2_keywords (>= 0.0.4)
|
39
33
|
faraday-cookie_jar (0.0.7)
|
40
34
|
faraday (>= 0.8.0)
|
41
35
|
http-cookie (~> 1.0.0)
|
42
|
-
faraday-em_http (1.0.0)
|
43
|
-
faraday-em_synchrony (1.0.0)
|
44
36
|
faraday-encoding (0.0.5)
|
45
37
|
faraday
|
46
|
-
faraday-
|
47
|
-
|
48
|
-
faraday-net_http (
|
49
|
-
faraday-net_http_persistent (1.1.0)
|
50
|
-
faraday-patron (1.0.0)
|
38
|
+
faraday-follow_redirects (0.3.0)
|
39
|
+
faraday (>= 1, < 3)
|
40
|
+
faraday-net_http (3.0.2)
|
51
41
|
faraday-rate_limiter (0.0.4)
|
52
42
|
faraday
|
53
|
-
|
54
|
-
faraday (~>
|
43
|
+
faraday-retry (2.0.0)
|
44
|
+
faraday (~> 2.0)
|
55
45
|
hashdiff (1.0.1)
|
56
|
-
hashie (
|
57
|
-
http-cookie (1.0.
|
46
|
+
hashie (5.0.0)
|
47
|
+
http-cookie (1.0.5)
|
58
48
|
domain_name (~> 0.5)
|
59
|
-
httpdisk (0.
|
49
|
+
httpdisk (1.0.0)
|
60
50
|
content-type (~> 0.0)
|
61
|
-
faraday (~>
|
51
|
+
faraday (~> 2.7)
|
62
52
|
faraday-cookie_jar (~> 0.0)
|
63
|
-
|
64
|
-
slop (~> 4.
|
53
|
+
faraday-follow_redirects (~> 0.0)
|
54
|
+
slop (~> 4.10)
|
55
|
+
json (2.6.3)
|
56
|
+
language_server-protocol (3.17.0.3)
|
57
|
+
lint_roller (1.1.0)
|
65
58
|
method_source (1.0.0)
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
nokogiri (1.11.7)
|
71
|
-
mini_portile2 (~> 2.5.0)
|
59
|
+
minitest (5.19.0)
|
60
|
+
mocha (2.1.0)
|
61
|
+
ruby2_keywords (>= 0.0.5)
|
62
|
+
nokogiri (1.15.4-arm64-darwin)
|
72
63
|
racc (~> 1.4)
|
73
|
-
parallel (1.
|
74
|
-
parser (3.
|
64
|
+
parallel (1.23.0)
|
65
|
+
parser (3.2.2.3)
|
75
66
|
ast (~> 2.4.1)
|
76
|
-
|
77
|
-
|
67
|
+
racc
|
68
|
+
parslet (2.0.0)
|
69
|
+
pry (0.14.2)
|
78
70
|
coderay (~> 1.1)
|
79
71
|
method_source (~> 1.0)
|
80
|
-
public_suffix (
|
81
|
-
racc (1.
|
82
|
-
rainbow (3.
|
72
|
+
public_suffix (5.0.3)
|
73
|
+
racc (1.7.1)
|
74
|
+
rainbow (3.1.1)
|
83
75
|
rake (13.0.6)
|
84
|
-
regexp_parser (2.
|
85
|
-
rexml (3.2.
|
86
|
-
rubocop (1.
|
76
|
+
regexp_parser (2.8.1)
|
77
|
+
rexml (3.2.6)
|
78
|
+
rubocop (1.52.1)
|
79
|
+
json (~> 2.3)
|
87
80
|
parallel (~> 1.10)
|
88
|
-
parser (>= 3.
|
81
|
+
parser (>= 3.2.2.3)
|
89
82
|
rainbow (>= 2.2.2, < 4.0)
|
90
83
|
regexp_parser (>= 1.8, < 3.0)
|
91
|
-
rexml
|
92
|
-
rubocop-ast (>= 1.
|
84
|
+
rexml (>= 3.2.5, < 4.0)
|
85
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
93
86
|
ruby-progressbar (~> 1.7)
|
94
|
-
unicode-display_width (>=
|
95
|
-
rubocop-ast (1.
|
96
|
-
parser (>= 3.
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
87
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
88
|
+
rubocop-ast (1.29.0)
|
89
|
+
parser (>= 3.2.1.0)
|
90
|
+
rubocop-performance (1.18.0)
|
91
|
+
rubocop (>= 1.7.0, < 2.0)
|
92
|
+
rubocop-ast (>= 0.4.0)
|
93
|
+
ruby-progressbar (1.13.0)
|
94
|
+
ruby2_keywords (0.0.5)
|
95
|
+
slop (4.10.1)
|
96
|
+
standard (1.30.1)
|
97
|
+
language_server-protocol (~> 3.17.0.2)
|
98
|
+
lint_roller (~> 1.0)
|
99
|
+
rubocop (~> 1.52.0)
|
100
|
+
standard-custom (~> 1.0.0)
|
101
|
+
standard-performance (~> 1.1.0)
|
102
|
+
standard-custom (1.0.2)
|
103
|
+
lint_roller (~> 1.0)
|
104
|
+
rubocop (~> 1.50)
|
105
|
+
standard-performance (1.1.2)
|
106
|
+
lint_roller (~> 1.1)
|
107
|
+
rubocop-performance (~> 1.18.0)
|
108
|
+
sterile (1.0.25)
|
101
109
|
nokogiri (>= 1.11.7)
|
102
110
|
unf (0.1.4)
|
103
111
|
unf_ext
|
104
|
-
unf_ext (0.0.
|
105
|
-
unicode-display_width (2.
|
106
|
-
webmock (3.
|
107
|
-
addressable (>= 2.
|
112
|
+
unf_ext (0.0.8.2)
|
113
|
+
unicode-display_width (2.4.2)
|
114
|
+
webmock (3.18.1)
|
115
|
+
addressable (>= 2.8.0)
|
108
116
|
crack (>= 0.3.2)
|
109
117
|
hashdiff (>= 0.4.0, < 2.0.0)
|
110
118
|
|
111
119
|
PLATFORMS
|
112
|
-
|
120
|
+
arm64-darwin-20
|
113
121
|
|
114
122
|
DEPENDENCIES
|
115
123
|
minitest
|
116
124
|
mocha
|
117
125
|
pry
|
118
126
|
rake
|
119
|
-
rubocop (~> 1.18)
|
120
127
|
sinew!
|
128
|
+
standard
|
121
129
|
webmock
|
122
130
|
|
123
131
|
BUNDLED WITH
|
124
|
-
2.
|
132
|
+
2.4.19
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
[![Build Status](https://github.com/gurgeous/sinew/workflows/test/badge.svg?branch=
|
1
|
+
[![Build Status](https://github.com/gurgeous/sinew/workflows/test/badge.svg?branch=main)](https://github.com/gurgeous/sinew/action)
|
2
2
|
|
3
3
|
## Welcome to Sinew
|
4
4
|
|
@@ -221,7 +221,11 @@ end.map(&:text)
|
|
221
221
|
|
222
222
|
## Changelog
|
223
223
|
|
224
|
-
#### 4.0.
|
224
|
+
#### 4.0.1 (Aug 2023)
|
225
|
+
|
226
|
+
- Updated dependencies, added justfile
|
227
|
+
|
228
|
+
#### 4.0.0 (Jul 2021)
|
225
229
|
|
226
230
|
- Rewritten to use simpler DSL
|
227
231
|
- Upgraded to httpdisk 0.5 to take advantage of the new encoding support
|
data/Rakefile
CHANGED
@@ -1,54 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "bundler/setup"
|
2
|
+
require "minitest/test_task"
|
3
3
|
|
4
|
-
# load the spec, we use it below
|
5
|
-
spec = Gem::Specification.load('sinew.gemspec')
|
6
|
-
|
7
|
-
#
|
8
|
-
# testing
|
9
|
-
# don't forget about TESTOPTS="--verbose" rake
|
10
|
-
# also: rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
|
11
|
-
#
|
12
|
-
|
13
|
-
# test (default)
|
14
|
-
Rake::TestTask.new
|
15
4
|
task default: :test
|
16
|
-
|
17
|
-
# Watch rb files, run tests whenever something changes
|
18
|
-
task :watch do
|
19
|
-
sh "find . -name '*.rb' -o -name '*.sinew' | entr -c rake"
|
20
|
-
end
|
21
|
-
|
22
|
-
#
|
23
|
-
# pry
|
24
|
-
#
|
25
|
-
|
26
|
-
task :pry do
|
27
|
-
sh 'pry -I lib -r sinew.rb'
|
28
|
-
end
|
29
|
-
|
30
|
-
#
|
31
|
-
# rubocop
|
32
|
-
#
|
33
|
-
|
34
|
-
task :rubocop do
|
35
|
-
sh 'bundle exec rubocop -A .'
|
36
|
-
end
|
37
|
-
|
38
|
-
#
|
39
|
-
# gem
|
40
|
-
#
|
41
|
-
|
42
|
-
task :build do
|
43
|
-
sh 'gem build --quiet sinew.gemspec'
|
44
|
-
end
|
45
|
-
|
46
|
-
task install: :build do
|
47
|
-
sh "gem install --quiet sinew-#{spec.version}.gem"
|
48
|
-
end
|
49
|
-
|
50
|
-
task release: %i[rubocop test build] do
|
51
|
-
sh "git tag -a #{spec.version} -m 'Tagging #{spec.version}'"
|
52
|
-
sh 'git push --tags'
|
53
|
-
sh "gem push sinew-#{spec.version}.gem"
|
54
|
-
end
|
5
|
+
Minitest::TestTask.create
|
data/bin/sinew
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
$LOAD_PATH.unshift(File.join(__dir__,
|
3
|
+
$LOAD_PATH.unshift(File.join(__dir__, "../lib"))
|
4
4
|
|
5
5
|
BIN = File.basename($PROGRAM_NAME)
|
6
6
|
|
@@ -8,12 +8,12 @@ BIN = File.basename($PROGRAM_NAME)
|
|
8
8
|
# Load the bare minimum and parse args with slop. For speed.
|
9
9
|
#
|
10
10
|
|
11
|
-
require
|
11
|
+
require "sinew/args"
|
12
12
|
begin
|
13
13
|
slop = Sinew::Args.slop(ARGV)
|
14
14
|
rescue Slop::Error => e
|
15
|
-
|
16
|
-
|
15
|
+
warn "#{BIN}: #{e}" if e.message != ""
|
16
|
+
warn("#{BIN}: try '#{BIN} --help' for more information")
|
17
17
|
exit 1
|
18
18
|
end
|
19
19
|
|
@@ -21,5 +21,5 @@ end
|
|
21
21
|
# now load everything and run
|
22
22
|
#
|
23
23
|
|
24
|
-
require
|
24
|
+
require "sinew"
|
25
25
|
Sinew::Main.new(slop).run
|
data/justfile
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
|
2
|
+
# read gem version
|
3
|
+
gemver := `cat lib/sinew/version.rb | grep -Eo "[0-9]+\.[0-9]+\.[0-9]+"`
|
4
|
+
|
5
|
+
#
|
6
|
+
# dev
|
7
|
+
#
|
8
|
+
|
9
|
+
default: test
|
10
|
+
|
11
|
+
check: lint test
|
12
|
+
|
13
|
+
fmt:
|
14
|
+
bundle exec rubocop -a
|
15
|
+
|
16
|
+
lint:
|
17
|
+
@just banner lint...
|
18
|
+
bundle exec rubocop
|
19
|
+
|
20
|
+
pry:
|
21
|
+
bundle exec pry -I lib -r sinew.rb
|
22
|
+
|
23
|
+
test:
|
24
|
+
@just banner test...
|
25
|
+
bundle exec rake test
|
26
|
+
|
27
|
+
watch:
|
28
|
+
@watchexec --watch lib --watch test --clear bundle exec rake test
|
29
|
+
|
30
|
+
#
|
31
|
+
# ci
|
32
|
+
#
|
33
|
+
|
34
|
+
ci:
|
35
|
+
bundle install
|
36
|
+
just check
|
37
|
+
|
38
|
+
#
|
39
|
+
# gem tasks
|
40
|
+
#
|
41
|
+
|
42
|
+
gem-push: check-git-status
|
43
|
+
@just banner gem build...
|
44
|
+
gem build sinew.gemspec
|
45
|
+
@just banner tag...
|
46
|
+
git tag -a "v{{gemver}}" -m "Tagging {{gemver}}"
|
47
|
+
git push --tags
|
48
|
+
@just banner gem push...
|
49
|
+
gem push "sinew-{{gemver}}.gem"
|
50
|
+
|
51
|
+
#
|
52
|
+
# util
|
53
|
+
#
|
54
|
+
|
55
|
+
banner *ARGS:
|
56
|
+
@printf '\e[42;37;1m[%s] %-72s \e[m\n' "$(date +%H:%M:%S)" "{{ARGS}}"
|
57
|
+
|
58
|
+
check-git-status:
|
59
|
+
@if [ ! -z "$(git status --porcelain)" ]; then echo "git status is dirty, bailing."; exit 1; fi
|
data/lib/sinew/args.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# manually load dependencies here since this is loaded standalone by bin
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
2
|
+
require "httpdisk/slop_duration"
|
3
|
+
require "sinew/version"
|
4
|
+
require "slop"
|
5
5
|
|
6
6
|
#
|
7
7
|
# This is used to parse command line arguments with Slop. We don't set any
|
@@ -13,26 +13,26 @@ module Sinew
|
|
13
13
|
module Args
|
14
14
|
def self.slop(args)
|
15
15
|
slop = Slop.parse(args) do |o|
|
16
|
-
o.banner =
|
17
|
-
o.integer
|
18
|
-
o.string
|
19
|
-
o.integer
|
20
|
-
o.bool
|
21
|
-
o.bool
|
16
|
+
o.banner = "Usage: sinew [options] [recipe.sinew]"
|
17
|
+
o.integer "-l", "--limit", "quit after emitting this many rows"
|
18
|
+
o.string "--proxy", "use host[:port] as HTTP proxy (can be a comma-delimited list)"
|
19
|
+
o.integer "--timeout", "maximum time allowed for the transfer"
|
20
|
+
o.bool "-s", "--silent", "suppress some output"
|
21
|
+
o.bool "-v", "--verbose", "dump emitted rows while running"
|
22
22
|
|
23
|
-
o.separator
|
24
|
-
o.string
|
23
|
+
o.separator "From httpdisk:"
|
24
|
+
o.string "--dir", "set custom cache directory"
|
25
25
|
# note: uses slop_duration from HTTPDisk
|
26
|
-
o.duration
|
27
|
-
o.bool
|
28
|
-
o.bool
|
26
|
+
o.duration "--expires", "when to expire cached requests (ex: 1h, 2d, 3w)"
|
27
|
+
o.bool "--force", "don't read anything from cache (but still write)"
|
28
|
+
o.bool "--force-errors", "don't read errors from cache (but still write)"
|
29
29
|
|
30
30
|
# generic
|
31
|
-
o.boolean
|
31
|
+
o.boolean "--version", "show version" do
|
32
32
|
puts "sinew #{Sinew::VERSION}"
|
33
33
|
exit
|
34
34
|
end
|
35
|
-
o.on(
|
35
|
+
o.on("--help", "show this help") do
|
36
36
|
puts o
|
37
37
|
exit
|
38
38
|
end
|
@@ -40,9 +40,9 @@ module Sinew
|
|
40
40
|
|
41
41
|
# recipe argument
|
42
42
|
recipe = slop.args.first
|
43
|
-
raise Slop::Error,
|
44
|
-
raise Slop::Error,
|
45
|
-
raise Slop::Error,
|
43
|
+
raise Slop::Error, "" if args.empty?
|
44
|
+
raise Slop::Error, "no RECIPE specified" if !recipe
|
45
|
+
raise Slop::Error, "more than one RECIPE specified" if slop.args.length > 1
|
46
46
|
raise Slop::Error, "#{recipe} not found" if !File.exist?(recipe)
|
47
47
|
|
48
48
|
slop.to_h.tap do
|
data/lib/sinew/base.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
1
|
+
require "amazing_print"
|
2
|
+
require "faraday-encoding"
|
3
|
+
require "faraday-rate_limiter"
|
4
|
+
require "faraday/logging/formatter"
|
5
|
+
require "faraday/retry"
|
6
|
+
require "httpdisk"
|
6
7
|
|
7
8
|
module Sinew
|
8
9
|
# Sinew base class, for in standalone scripts or via the sinew binary.
|
@@ -17,7 +18,7 @@ module Sinew
|
|
17
18
|
#
|
18
19
|
|
19
20
|
# default :rate_limit, typically 1
|
20
|
-
default_rate_limit = ENV[
|
21
|
+
default_rate_limit = ENV["SINEW_TEST"] ? 0 : 1
|
21
22
|
|
22
23
|
#
|
23
24
|
# note: uses HTTPDisk::Sloptions
|
@@ -32,7 +33,7 @@ module Sinew
|
|
32
33
|
_1.boolean :verbose
|
33
34
|
|
34
35
|
# httpdisk
|
35
|
-
_1.string :dir, default: File.join(ENV[
|
36
|
+
_1.string :dir, default: File.join(ENV["HOME"], ".sinew")
|
36
37
|
_1.integer :expires
|
37
38
|
_1.boolean :force
|
38
39
|
_1.boolean :force_errors
|
@@ -75,7 +76,7 @@ module Sinew
|
|
75
76
|
# http post json, returns a Response
|
76
77
|
def post_json(url, body = nil, headers = nil)
|
77
78
|
body = body.to_json
|
78
|
-
headers = (headers || {}).merge(
|
79
|
+
headers = (headers || {}).merge("Content-Type" => "application/json")
|
79
80
|
post(url, body, headers)
|
80
81
|
end
|
81
82
|
|
@@ -93,7 +94,7 @@ module Sinew
|
|
93
94
|
# Returns true if request is cached. Defaults to form body type.
|
94
95
|
def cached?(method, url, params = nil, body = nil)
|
95
96
|
status = status(method, url, params, body)
|
96
|
-
status[:status] !=
|
97
|
+
status[:status] != "miss"
|
97
98
|
end
|
98
99
|
|
99
100
|
# Remove cache file, if any. Defaults to form body type.
|
@@ -159,8 +160,8 @@ module Sinew
|
|
159
160
|
|
160
161
|
# Print a nice green banner.
|
161
162
|
def banner(msg, color: GREEN)
|
162
|
-
msg = "#{msg} ".ljust(72,
|
163
|
-
msg = "[#{Time.new.strftime(
|
163
|
+
msg = "#{msg} ".ljust(72, " ")
|
164
|
+
msg = "[#{Time.new.strftime("%H:%M:%S")}] #{msg}"
|
164
165
|
msg = "#{color}#{msg}#{RESET}" if $stdout.tty?
|
165
166
|
puts msg
|
166
167
|
end
|
@@ -178,7 +179,7 @@ module Sinew
|
|
178
179
|
return if !options[:proxy]
|
179
180
|
|
180
181
|
proxies = options[:proxy]
|
181
|
-
proxies = proxies.split(
|
182
|
+
proxies = proxies.split(",") if !proxies.is_a?(Array)
|
182
183
|
proxies.sample
|
183
184
|
end
|
184
185
|
|
@@ -186,7 +187,7 @@ module Sinew
|
|
186
187
|
def create_faraday
|
187
188
|
faraday_options = options.slice(:headers, :params)
|
188
189
|
if options[:insecure]
|
189
|
-
faraday_options[:ssl] = {
|
190
|
+
faraday_options[:ssl] = {verify: false}
|
190
191
|
end
|
191
192
|
Faraday.new(nil, faraday_options) do
|
192
193
|
# options
|
@@ -233,7 +234,7 @@ module Sinew
|
|
233
234
|
max: options[:retries],
|
234
235
|
methods: %w[delete get head options patch post put trace],
|
235
236
|
retry_statuses: (500..600).to_a,
|
236
|
-
retry_if: ->(_env, _err) { true }
|
237
|
+
retry_if: ->(_env, _err) { true }
|
237
238
|
}
|
238
239
|
_1.request :retry, retry_options
|
239
240
|
end
|
data/lib/sinew/csv.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "csv"
|
2
|
+
require "sterile"
|
3
3
|
|
4
4
|
module Sinew
|
5
5
|
class CSV
|
@@ -13,11 +13,11 @@ module Sinew
|
|
13
13
|
|
14
14
|
# start writing the csv
|
15
15
|
def start(columns)
|
16
|
-
raise
|
16
|
+
raise "started twice" if started?
|
17
17
|
|
18
18
|
@columns = columns
|
19
19
|
@tally = columns.map { [_1, 0] }.to_h
|
20
|
-
@csv = ::CSV.open(path,
|
20
|
+
@csv = ::CSV.open(path, "wb").tap do
|
21
21
|
_1 << columns
|
22
22
|
end
|
23
23
|
end
|
@@ -50,7 +50,7 @@ module Sinew
|
|
50
50
|
end
|
51
51
|
|
52
52
|
ASCII_ONLY = begin
|
53
|
-
chars = (33..126).map(&:chr) - [
|
53
|
+
chars = (33..126).map(&:chr) - ["&"]
|
54
54
|
/\A[#{Regexp.escape(chars.join)}\s]+\Z/
|
55
55
|
end.freeze
|
56
56
|
|
@@ -59,14 +59,14 @@ module Sinew
|
|
59
59
|
s = if s.respond_to?(:inner_html)
|
60
60
|
s.inner_html
|
61
61
|
elsif s.is_a?(Array)
|
62
|
-
s.join(
|
62
|
+
s.join("|")
|
63
63
|
else
|
64
64
|
s.to_s
|
65
65
|
end
|
66
66
|
return if s.empty?
|
67
67
|
|
68
68
|
# simple attempt to strip tags. Note that we replace tags with spaces
|
69
|
-
s = s.gsub(/<[^>]+>/,
|
69
|
+
s = s.gsub(/<[^>]+>/, " ")
|
70
70
|
|
71
71
|
if s !~ ASCII_ONLY
|
72
72
|
# Converts MS Word 'smart punctuation' to ASCII
|
@@ -80,7 +80,7 @@ module Sinew
|
|
80
80
|
end
|
81
81
|
|
82
82
|
# squish
|
83
|
-
s = s.strip.gsub(/\s+/,
|
83
|
+
s = s.strip.gsub(/\s+/, " ")
|
84
84
|
return if s.empty?
|
85
85
|
|
86
86
|
s
|
data/lib/sinew/main.rb
CHANGED
@@ -8,7 +8,7 @@ module Sinew
|
|
8
8
|
options[:output] ||= begin
|
9
9
|
src = options[:recipe]
|
10
10
|
dst = File.join(File.dirname(src), "#{File.basename(src, File.extname(src))}.csv")
|
11
|
-
dst = dst.sub(%r{^./},
|
11
|
+
dst = dst.sub(%r{^./}, "") # nice to clean this up
|
12
12
|
dst
|
13
13
|
end
|
14
14
|
|
@@ -21,7 +21,7 @@ module Sinew
|
|
21
21
|
recipe = sinew.options[:recipe]
|
22
22
|
dsl = DSL.new(sinew)
|
23
23
|
begin
|
24
|
-
dsl.instance_eval(File.read(recipe, mode:
|
24
|
+
dsl.instance_eval(File.read(recipe, mode: "rb"), recipe)
|
25
25
|
rescue LimitError
|
26
26
|
# ignore - this is flow control for --limit
|
27
27
|
end
|
@@ -43,12 +43,12 @@ module Sinew
|
|
43
43
|
count = csv.count
|
44
44
|
|
45
45
|
if count == 0
|
46
|
-
sinew.banner(format(
|
46
|
+
sinew.banner(format("Done in %ds. Nothing written.", elapsed))
|
47
47
|
return
|
48
48
|
end
|
49
49
|
|
50
50
|
# summary
|
51
|
-
msg = format(
|
51
|
+
msg = format("Done in %ds. Wrote %d rows to %s. Summary:", elapsed, count, csv.path)
|
52
52
|
sinew.banner(msg)
|
53
53
|
|
54
54
|
# tally
|
data/lib/sinew/nokogiri_ext.rb
CHANGED
@@ -1,18 +1,18 @@
|
|
1
|
-
require
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
3
|
# modify NodeSet to join with SPACE instead of empty string
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
6
|
class NodeSet
|
7
|
-
|
8
|
-
|
7
|
+
alias_method :old_inner_html, :inner_html
|
8
|
+
alias_method :old_inner_text, :inner_text
|
9
9
|
|
10
10
|
def inner_text
|
11
|
-
map(&:inner_text).join(
|
11
|
+
map(&:inner_text).join(" ")
|
12
12
|
end
|
13
13
|
|
14
14
|
def inner_html(*args)
|
15
|
-
map { _1.inner_html(*args) }.join(
|
15
|
+
map { _1.inner_html(*args) }.join(" ")
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
data/lib/sinew/response.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "delegate"
|
2
|
+
require "hashie/mash"
|
3
|
+
require "json"
|
4
|
+
require "nokogiri"
|
5
5
|
|
6
6
|
module Sinew
|
7
7
|
# A wrapper around Faraday::Response, with some parsing helpers.
|
@@ -11,12 +11,12 @@ module Sinew
|
|
11
11
|
@html ||= body.dup.tap do
|
12
12
|
# fix invalid utf8
|
13
13
|
if _1.encoding == Encoding::UTF_8
|
14
|
-
_1.encode!(
|
14
|
+
_1.encode!("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
15
15
|
end
|
16
16
|
|
17
17
|
# squish
|
18
18
|
_1.strip!
|
19
|
-
_1.gsub!(/\s+/,
|
19
|
+
_1.gsub!(/\s+/, " ")
|
20
20
|
|
21
21
|
# kill whitespace around tags
|
22
22
|
_1.gsub!(/ ?<([^>]+)> ?/, '<\\1>')
|
data/lib/sinew/version.rb
CHANGED
data/lib/sinew.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
# sinew
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
2
|
+
require "sinew/args"
|
3
|
+
require "sinew/base"
|
4
|
+
require "sinew/csv"
|
5
|
+
require "sinew/main"
|
6
|
+
require "sinew/nokogiri_ext"
|
7
|
+
require "sinew/response"
|
8
|
+
require "sinew/version"
|
9
9
|
|
10
10
|
# custom faraday middleware
|
11
|
-
require
|
11
|
+
require "sinew/middleware/log_formatter"
|
12
12
|
|
13
13
|
module Sinew
|
14
14
|
# flow control for --limit
|
data/sample.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative "lib/sinew"
|
2
2
|
|
3
|
-
sinew = Sinew.new(output:
|
3
|
+
sinew = Sinew.new(output: "sample.csv", verbose: true)
|
4
4
|
|
5
|
-
response = sinew.get
|
6
|
-
response.noko.css(
|
5
|
+
response = sinew.get "http://httpbingo.org"
|
6
|
+
response.noko.css("ul li a").each do |a|
|
7
7
|
row = {}
|
8
8
|
row[:url] = a[:href]
|
9
9
|
row[:title] = a.text
|
10
10
|
sinew.csv_emit(row)
|
11
11
|
end
|
12
12
|
|
13
|
-
sinew.get
|
13
|
+
sinew.get "http://httpbingo.org/redirect/2"
|
data/sinew.gemspec
CHANGED
@@ -1,35 +1,36 @@
|
|
1
1
|
$LOAD_PATH.unshift("#{__dir__}/lib")
|
2
2
|
|
3
|
-
require
|
3
|
+
require "sinew/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
|
-
s.name =
|
6
|
+
s.name = "sinew"
|
7
7
|
s.version = Sinew::VERSION
|
8
|
-
s.authors = [
|
9
|
-
s.email = [
|
8
|
+
s.authors = ["Adam Doppelt", "Nathan Kriege"]
|
9
|
+
s.email = ["amd@gurge.com"]
|
10
10
|
|
11
|
-
s.summary =
|
12
|
-
s.description =
|
13
|
-
s.homepage =
|
14
|
-
s.license =
|
15
|
-
s.required_ruby_version =
|
11
|
+
s.summary = "Sinew - structured web crawling using recipes."
|
12
|
+
s.description = "Crawl web sites easily using ruby recipes, with caching and nokogiri."
|
13
|
+
s.homepage = "http://github.com/gurgeous/sinew"
|
14
|
+
s.license = "MIT"
|
15
|
+
s.required_ruby_version = ">= 3.1"
|
16
16
|
|
17
17
|
# what's in the gem?
|
18
18
|
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
19
19
|
`git ls-files -z`.split("\x0").reject { _1.match(%r{^test/}) }
|
20
20
|
end
|
21
|
-
s.bindir =
|
21
|
+
s.bindir = "bin"
|
22
22
|
s.executables = s.files.grep(%r{^#{s.bindir}/}) { File.basename(_1) }
|
23
|
-
s.require_paths = [
|
23
|
+
s.require_paths = ["lib"]
|
24
24
|
|
25
25
|
# gem dependencies
|
26
|
-
s.add_dependency
|
27
|
-
s.add_dependency
|
28
|
-
s.add_dependency
|
29
|
-
s.add_dependency
|
30
|
-
s.add_dependency
|
31
|
-
s.add_dependency
|
32
|
-
s.add_dependency
|
33
|
-
s.add_dependency
|
34
|
-
s.add_dependency
|
26
|
+
s.add_dependency "amazing_print", "~> 1.5"
|
27
|
+
s.add_dependency "faraday", "~> 2.7"
|
28
|
+
s.add_dependency "faraday-encoding", "~> 0.0"
|
29
|
+
s.add_dependency "faraday-rate_limiter", "~> 0.0"
|
30
|
+
s.add_dependency "faraday-retry", "~> 2.0"
|
31
|
+
s.add_dependency "hashie", "~> 5.0"
|
32
|
+
s.add_dependency "httpdisk", "~> 1.0"
|
33
|
+
s.add_dependency "nokogiri", "~> 1.15"
|
34
|
+
s.add_dependency "slop", "~> 4.10"
|
35
|
+
s.add_dependency "sterile", "~> 1.0"
|
35
36
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sinew
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Doppelt
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-08-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: amazing_print
|
@@ -17,42 +17,42 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '1.
|
20
|
+
version: '1.5'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '1.
|
27
|
+
version: '1.5'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: faraday
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '
|
34
|
+
version: '2.7'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
41
|
+
version: '2.7'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: faraday-encoding
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: '0'
|
48
|
+
version: '0.0'
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '0'
|
55
|
+
version: '0.0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: faraday-rate_limiter
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,62 +67,76 @@ dependencies:
|
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: faraday-retry
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '2.0'
|
77
|
+
type: :runtime
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '2.0'
|
70
84
|
- !ruby/object:Gem::Dependency
|
71
85
|
name: hashie
|
72
86
|
requirement: !ruby/object:Gem::Requirement
|
73
87
|
requirements:
|
74
88
|
- - "~>"
|
75
89
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
90
|
+
version: '5.0'
|
77
91
|
type: :runtime
|
78
92
|
prerelease: false
|
79
93
|
version_requirements: !ruby/object:Gem::Requirement
|
80
94
|
requirements:
|
81
95
|
- - "~>"
|
82
96
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
97
|
+
version: '5.0'
|
84
98
|
- !ruby/object:Gem::Dependency
|
85
99
|
name: httpdisk
|
86
100
|
requirement: !ruby/object:Gem::Requirement
|
87
101
|
requirements:
|
88
102
|
- - "~>"
|
89
103
|
- !ruby/object:Gem::Version
|
90
|
-
version: '0
|
104
|
+
version: '1.0'
|
91
105
|
type: :runtime
|
92
106
|
prerelease: false
|
93
107
|
version_requirements: !ruby/object:Gem::Requirement
|
94
108
|
requirements:
|
95
109
|
- - "~>"
|
96
110
|
- !ruby/object:Gem::Version
|
97
|
-
version: '0
|
111
|
+
version: '1.0'
|
98
112
|
- !ruby/object:Gem::Dependency
|
99
113
|
name: nokogiri
|
100
114
|
requirement: !ruby/object:Gem::Requirement
|
101
115
|
requirements:
|
102
116
|
- - "~>"
|
103
117
|
- !ruby/object:Gem::Version
|
104
|
-
version: '1.
|
118
|
+
version: '1.15'
|
105
119
|
type: :runtime
|
106
120
|
prerelease: false
|
107
121
|
version_requirements: !ruby/object:Gem::Requirement
|
108
122
|
requirements:
|
109
123
|
- - "~>"
|
110
124
|
- !ruby/object:Gem::Version
|
111
|
-
version: '1.
|
125
|
+
version: '1.15'
|
112
126
|
- !ruby/object:Gem::Dependency
|
113
127
|
name: slop
|
114
128
|
requirement: !ruby/object:Gem::Requirement
|
115
129
|
requirements:
|
116
130
|
- - "~>"
|
117
131
|
- !ruby/object:Gem::Version
|
118
|
-
version: '4.
|
132
|
+
version: '4.10'
|
119
133
|
type: :runtime
|
120
134
|
prerelease: false
|
121
135
|
version_requirements: !ruby/object:Gem::Requirement
|
122
136
|
requirements:
|
123
137
|
- - "~>"
|
124
138
|
- !ruby/object:Gem::Version
|
125
|
-
version: '4.
|
139
|
+
version: '4.10'
|
126
140
|
- !ruby/object:Gem::Dependency
|
127
141
|
name: sterile
|
128
142
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,6 +168,7 @@ files:
|
|
154
168
|
- README.md
|
155
169
|
- Rakefile
|
156
170
|
- bin/sinew
|
171
|
+
- justfile
|
157
172
|
- lib/sinew.rb
|
158
173
|
- lib/sinew/args.rb
|
159
174
|
- lib/sinew/base.rb
|
@@ -178,14 +193,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
178
193
|
requirements:
|
179
194
|
- - ">="
|
180
195
|
- !ruby/object:Gem::Version
|
181
|
-
version: '
|
196
|
+
version: '3.1'
|
182
197
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
183
198
|
requirements:
|
184
199
|
- - ">="
|
185
200
|
- !ruby/object:Gem::Version
|
186
201
|
version: '0'
|
187
202
|
requirements: []
|
188
|
-
rubygems_version: 3.
|
203
|
+
rubygems_version: 3.3.7
|
189
204
|
signing_key:
|
190
205
|
specification_version: 4
|
191
206
|
summary: Sinew - structured web crawling using recipes.
|