stream_lines 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci-tests.yml +30 -0
- data/Gemfile.lock +38 -31
- data/README.md +4 -4
- data/bin/console +2 -0
- data/lib/stream_lines/reading/csv.rb +3 -1
- data/lib/stream_lines/reading/json_lines.rb +2 -2
- data/lib/stream_lines/reading/stream.rb +27 -12
- data/lib/stream_lines/version.rb +1 -1
- data/stream_lines.gemspec +2 -1
- metadata +19 -5
- data/.travis.yml +0 -15
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9e832d9790c71431c43722f116cca885d3fee985001b12f5d0c53cc17e78c7b8
|
|
4
|
+
data.tar.gz: 76338bc7e203bfa59f3aed2469db3694951dfd5905c6bcfe416b08f1512f3289
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d980bdddd41227418da10447e57599ee94cea52b7ea926e1cb3f0efd8a3b71b7e8f44c70ed7a62dc37ea31ac97e84a1f5d20677ecb933bd26be7c2348e533c5
|
|
7
|
+
data.tar.gz: 721610a59f59862772b0ea259b6f22c0e84f058d458538ae993478324e9e315dc6fa0b469eaebb56fd630f59bd43b3bb43956f2111855c22cb00bb744296018c
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
|
2
|
+
# They are provided by a third-party and are governed by
|
|
3
|
+
# separate terms of service, privacy policy, and support
|
|
4
|
+
# documentation.
|
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
|
7
|
+
|
|
8
|
+
name: Tests
|
|
9
|
+
|
|
10
|
+
on:
|
|
11
|
+
push:
|
|
12
|
+
branches:
|
|
13
|
+
- main
|
|
14
|
+
pull_request:
|
|
15
|
+
branches:
|
|
16
|
+
- '*'
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
test:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v2
|
|
23
|
+
- name: Set up Ruby
|
|
24
|
+
uses: ruby/setup-ruby@v1
|
|
25
|
+
with:
|
|
26
|
+
ruby-version: 2.6
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: bundle install
|
|
29
|
+
- name: Run tests
|
|
30
|
+
run: bundle exec rake
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
stream_lines (0.
|
|
4
|
+
stream_lines (0.4.0)
|
|
5
5
|
httparty (~> 0.14)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -9,70 +9,76 @@ GEM
|
|
|
9
9
|
specs:
|
|
10
10
|
addressable (2.7.0)
|
|
11
11
|
public_suffix (>= 2.0.2, < 5.0)
|
|
12
|
-
ast (2.4.
|
|
12
|
+
ast (2.4.1)
|
|
13
13
|
awesome_print (1.8.0)
|
|
14
|
-
backports (3.
|
|
15
|
-
bundler-audit (0.
|
|
14
|
+
backports (3.17.2)
|
|
15
|
+
bundler-audit (0.7.0.1)
|
|
16
16
|
bundler (>= 1.2.0, < 3)
|
|
17
|
-
thor (
|
|
17
|
+
thor (>= 0.18, < 2)
|
|
18
18
|
bundler-gem_version_tasks (0.2.1)
|
|
19
|
-
byebug (11.1.
|
|
19
|
+
byebug (11.1.3)
|
|
20
|
+
charlock_holmes (0.7.7)
|
|
20
21
|
crack (0.4.3)
|
|
21
22
|
safe_yaml (~> 1.0.0)
|
|
22
23
|
diff-lcs (1.3)
|
|
23
24
|
docile (1.3.2)
|
|
24
|
-
ffi (1.
|
|
25
|
+
ffi (1.13.0)
|
|
25
26
|
get_process_mem (0.2.5)
|
|
26
27
|
ffi (~> 1.0)
|
|
27
|
-
hashdiff (1.0.
|
|
28
|
-
httparty (0.
|
|
28
|
+
hashdiff (1.0.1)
|
|
29
|
+
httparty (0.18.1)
|
|
29
30
|
mime-types (~> 3.0)
|
|
30
31
|
multi_xml (>= 0.5.2)
|
|
31
|
-
jaro_winkler (1.5.4)
|
|
32
32
|
memory_profiler (0.9.14)
|
|
33
33
|
mime-types (3.3.1)
|
|
34
34
|
mime-types-data (~> 3.2015)
|
|
35
|
-
mime-types-data (3.
|
|
35
|
+
mime-types-data (3.2020.0512)
|
|
36
36
|
multi_json (1.14.1)
|
|
37
37
|
multi_xml (0.6.0)
|
|
38
38
|
mustermann (1.1.1)
|
|
39
39
|
ruby2_keywords (~> 0.0.1)
|
|
40
|
-
parallel (1.19.
|
|
41
|
-
parser (2.7.
|
|
42
|
-
ast (~> 2.4.
|
|
43
|
-
public_suffix (4.0.
|
|
44
|
-
rack (2.
|
|
40
|
+
parallel (1.19.2)
|
|
41
|
+
parser (2.7.1.4)
|
|
42
|
+
ast (~> 2.4.1)
|
|
43
|
+
public_suffix (4.0.5)
|
|
44
|
+
rack (2.2.3)
|
|
45
45
|
rack-protection (2.0.8.1)
|
|
46
46
|
rack
|
|
47
47
|
rainbow (3.0.0)
|
|
48
48
|
rake (13.0.1)
|
|
49
|
+
regexp_parser (1.7.1)
|
|
50
|
+
rexml (3.2.4)
|
|
49
51
|
rspec (3.9.0)
|
|
50
52
|
rspec-core (~> 3.9.0)
|
|
51
53
|
rspec-expectations (~> 3.9.0)
|
|
52
54
|
rspec-mocks (~> 3.9.0)
|
|
53
|
-
rspec-core (3.9.
|
|
54
|
-
rspec-support (~> 3.9.
|
|
55
|
-
rspec-expectations (3.9.
|
|
55
|
+
rspec-core (3.9.2)
|
|
56
|
+
rspec-support (~> 3.9.3)
|
|
57
|
+
rspec-expectations (3.9.2)
|
|
56
58
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
57
59
|
rspec-support (~> 3.9.0)
|
|
58
60
|
rspec-mocks (3.9.1)
|
|
59
61
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
60
62
|
rspec-support (~> 3.9.0)
|
|
61
|
-
rspec-support (3.9.
|
|
62
|
-
rubocop (0.
|
|
63
|
-
jaro_winkler (~> 1.5.1)
|
|
63
|
+
rspec-support (3.9.3)
|
|
64
|
+
rubocop (0.87.1)
|
|
64
65
|
parallel (~> 1.10)
|
|
65
|
-
parser (>= 2.7.
|
|
66
|
+
parser (>= 2.7.1.1)
|
|
66
67
|
rainbow (>= 2.2.2, < 4.0)
|
|
68
|
+
regexp_parser (>= 1.7)
|
|
69
|
+
rexml
|
|
70
|
+
rubocop-ast (>= 0.1.0, < 1.0)
|
|
67
71
|
ruby-progressbar (~> 1.7)
|
|
68
|
-
unicode-display_width (>= 1.4.0, <
|
|
72
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
|
73
|
+
rubocop-ast (0.1.0)
|
|
74
|
+
parser (>= 2.7.0.1)
|
|
69
75
|
ruby-progressbar (1.10.1)
|
|
70
76
|
ruby2_keywords (0.0.2)
|
|
71
77
|
safe_yaml (1.0.5)
|
|
72
|
-
simplecov (0.18.
|
|
78
|
+
simplecov (0.18.5)
|
|
73
79
|
docile (~> 1.1)
|
|
74
|
-
simplecov-html (~> 0.11
|
|
75
|
-
simplecov-html (0.
|
|
80
|
+
simplecov-html (~> 0.11)
|
|
81
|
+
simplecov-html (0.12.2)
|
|
76
82
|
sinatra (2.0.8.1)
|
|
77
83
|
mustermann (~> 1.0)
|
|
78
84
|
rack (~> 2.0)
|
|
@@ -85,10 +91,10 @@ GEM
|
|
|
85
91
|
rack-protection (= 2.0.8.1)
|
|
86
92
|
sinatra (= 2.0.8.1)
|
|
87
93
|
tilt (~> 2.0)
|
|
88
|
-
thor (0.
|
|
94
|
+
thor (1.0.1)
|
|
89
95
|
tilt (2.0.10)
|
|
90
|
-
unicode-display_width (1.
|
|
91
|
-
webmock (3.8.
|
|
96
|
+
unicode-display_width (1.7.0)
|
|
97
|
+
webmock (3.8.3)
|
|
92
98
|
addressable (>= 2.3.6)
|
|
93
99
|
crack (>= 0.3.2)
|
|
94
100
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
@@ -102,11 +108,12 @@ DEPENDENCIES
|
|
|
102
108
|
bundler-audit
|
|
103
109
|
bundler-gem_version_tasks
|
|
104
110
|
byebug
|
|
111
|
+
charlock_holmes
|
|
105
112
|
get_process_mem
|
|
106
113
|
memory_profiler
|
|
107
114
|
rake (~> 13.0)
|
|
108
115
|
rspec (~> 3.0)
|
|
109
|
-
rubocop (~> 0.
|
|
116
|
+
rubocop (~> 0.87.1)
|
|
110
117
|
simplecov (~> 0.17)
|
|
111
118
|
sinatra (~> 2.0)
|
|
112
119
|
sinatra-contrib (~> 2.0)
|
data/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# StreamLines
|
|
2
2
|
|
|
3
3
|
[](https://badge.fury.io/rb/stream_lines)
|
|
4
|
-
|
|
4
|
+

|
|
5
5
|
|
|
6
6
|
An API for streaming files from remote locations one line at a time.
|
|
7
7
|
|
|
@@ -119,10 +119,10 @@ To install this gem onto your local machine, run `bundle exec rake install`.
|
|
|
119
119
|
|
|
120
120
|
## Releasing
|
|
121
121
|
|
|
122
|
-
After merging in the new functionality to the
|
|
122
|
+
After merging in the new functionality to the main branch:
|
|
123
123
|
|
|
124
124
|
```
|
|
125
|
-
git checkout
|
|
125
|
+
git checkout main
|
|
126
126
|
git pull --prune
|
|
127
127
|
bundle exec rake version:bump:<major, minor, or patch>
|
|
128
128
|
bundle exec rubocop -a
|
|
@@ -135,7 +135,7 @@ bundle exec rake release
|
|
|
135
135
|
Bug reports and pull requests are welcome on GitHub at
|
|
136
136
|
https://github.com/jdlubrano/stream_lines. This project is intended to be a
|
|
137
137
|
safe, welcoming space for collaboration, and contributors are expected to
|
|
138
|
-
adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/
|
|
138
|
+
adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/main/CODE_OF_CONDUCT.md).
|
|
139
139
|
|
|
140
140
|
## License
|
|
141
141
|
|
data/bin/console
CHANGED
|
@@ -3,7 +3,9 @@
|
|
|
3
3
|
# frozen_string_literal: true
|
|
4
4
|
|
|
5
5
|
require 'bundler/setup'
|
|
6
|
+
require 'byebug'
|
|
6
7
|
require 'stream_lines'
|
|
8
|
+
require 'charlock_holmes/string'
|
|
7
9
|
|
|
8
10
|
# You can add fixtures and/or initialization code here to make experimenting
|
|
9
11
|
# with your gem easier. You can also use a different console, if you like.
|
|
@@ -23,7 +23,9 @@ module StreamLines
|
|
|
23
23
|
def initialize(url, **csv_options)
|
|
24
24
|
@url = url
|
|
25
25
|
@csv_options = accepted_csv_options(csv_options)
|
|
26
|
-
|
|
26
|
+
|
|
27
|
+
encoding = @csv_options[:encoding] || Encoding.default_external
|
|
28
|
+
@stream = Stream.new(url, encoding: encoding)
|
|
27
29
|
end
|
|
28
30
|
|
|
29
31
|
def each(&block)
|
|
@@ -8,10 +8,10 @@ module StreamLines
|
|
|
8
8
|
class JSONLines
|
|
9
9
|
include Enumerable
|
|
10
10
|
|
|
11
|
-
def initialize(url, **json_options)
|
|
11
|
+
def initialize(url, encoding: Encoding.default_external, **json_options)
|
|
12
12
|
@url = url
|
|
13
13
|
@json_options = json_options
|
|
14
|
-
@stream = Stream.new(url)
|
|
14
|
+
@stream = Stream.new(url, encoding: encoding)
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def each(&block)
|
|
@@ -12,9 +12,10 @@ module StreamLines
|
|
|
12
12
|
|
|
13
13
|
raise_on 400..599
|
|
14
14
|
|
|
15
|
-
def initialize(url)
|
|
15
|
+
def initialize(url, encoding: Encoding.default_external)
|
|
16
16
|
@url = url
|
|
17
|
-
@
|
|
17
|
+
@encoding = encoding
|
|
18
|
+
@buffer = String.new(encoding: @encoding)
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
def each(&block)
|
|
@@ -33,22 +34,36 @@ module StreamLines
|
|
|
33
34
|
lines.each { |line| block.call(line) }
|
|
34
35
|
end
|
|
35
36
|
|
|
36
|
-
@buffer.
|
|
37
|
-
block.call(@buffer.read) if @buffer.size.positive?
|
|
37
|
+
block.call(@buffer) if @buffer.size.positive?
|
|
38
38
|
end
|
|
39
39
|
|
|
40
40
|
def extract_lines(chunk)
|
|
41
|
-
|
|
41
|
+
encoded_chunk = @buffer + chunk.to_s.dup.force_encoding(@encoding)
|
|
42
|
+
lines = split_lines(encoded_chunk)
|
|
43
|
+
@buffer = String.new(encoding: @encoding)
|
|
44
|
+
@buffer << lines.pop.to_s
|
|
42
45
|
|
|
43
|
-
if lines.length > 1
|
|
44
|
-
@buffer.rewind
|
|
45
|
-
lines.first.prepend(@buffer.read)
|
|
46
|
-
@buffer = StringIO.new
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
@buffer << lines.pop
|
|
50
46
|
lines
|
|
51
47
|
end
|
|
48
|
+
|
|
49
|
+
def split_lines(encoded_chunk)
|
|
50
|
+
encoded_chunk.split($INPUT_RECORD_SEPARATOR, -1)
|
|
51
|
+
rescue ArgumentError => e
|
|
52
|
+
raise e unless /invalid byte sequence/.match?(e.message)
|
|
53
|
+
|
|
54
|
+
# NOTE: (jdlubrano)
|
|
55
|
+
# The last byte in the chunk is most likely a part of a multibyte
|
|
56
|
+
# character that, on its own, is an invalid byte sequence. So, we
|
|
57
|
+
# want to split the lines containing all valid bytes and make the
|
|
58
|
+
# trailing bytes the last line. The last line eventually gets added
|
|
59
|
+
# to the buffer, prepended to the next chunk, and, hopefully, restores
|
|
60
|
+
# a valid byte sequence.
|
|
61
|
+
last_newline_index = encoded_chunk.rindex($INPUT_RECORD_SEPARATOR)
|
|
62
|
+
return [encoded_chunk] if last_newline_index.nil?
|
|
63
|
+
|
|
64
|
+
valid_lines = encoded_chunk[0...last_newline_index].split($INPUT_RECORD_SEPARATOR, -1)
|
|
65
|
+
valid_lines + [encoded_chunk[(last_newline_index + 1)..-1]].compact
|
|
66
|
+
end
|
|
52
67
|
end
|
|
53
68
|
end
|
|
54
69
|
end
|
data/lib/stream_lines/version.rb
CHANGED
data/stream_lines.gemspec
CHANGED
|
@@ -33,11 +33,12 @@ Gem::Specification.new do |spec|
|
|
|
33
33
|
spec.add_development_dependency 'bundler-audit'
|
|
34
34
|
spec.add_development_dependency 'bundler-gem_version_tasks'
|
|
35
35
|
spec.add_development_dependency 'byebug'
|
|
36
|
+
spec.add_development_dependency 'charlock_holmes'
|
|
36
37
|
spec.add_development_dependency 'get_process_mem'
|
|
37
38
|
spec.add_development_dependency 'memory_profiler'
|
|
38
39
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
39
40
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
40
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
|
41
|
+
spec.add_development_dependency 'rubocop', '~> 0.87.1'
|
|
41
42
|
spec.add_development_dependency 'simplecov', '~> 0.17'
|
|
42
43
|
spec.add_development_dependency 'sinatra', '~> 2.0'
|
|
43
44
|
spec.add_development_dependency 'sinatra-contrib', '~> 2.0'
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: stream_lines
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Joel Lubrano
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-
|
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: httparty
|
|
@@ -94,6 +94,20 @@ dependencies:
|
|
|
94
94
|
- - ">="
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
96
|
version: '0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: charlock_holmes
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: '0'
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - ">="
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: '0'
|
|
97
111
|
- !ruby/object:Gem::Dependency
|
|
98
112
|
name: get_process_mem
|
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -156,14 +170,14 @@ dependencies:
|
|
|
156
170
|
requirements:
|
|
157
171
|
- - "~>"
|
|
158
172
|
- !ruby/object:Gem::Version
|
|
159
|
-
version: 0.
|
|
173
|
+
version: 0.87.1
|
|
160
174
|
type: :development
|
|
161
175
|
prerelease: false
|
|
162
176
|
version_requirements: !ruby/object:Gem::Requirement
|
|
163
177
|
requirements:
|
|
164
178
|
- - "~>"
|
|
165
179
|
- !ruby/object:Gem::Version
|
|
166
|
-
version: 0.
|
|
180
|
+
version: 0.87.1
|
|
167
181
|
- !ruby/object:Gem::Dependency
|
|
168
182
|
name: simplecov
|
|
169
183
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -227,12 +241,12 @@ executables: []
|
|
|
227
241
|
extensions: []
|
|
228
242
|
extra_rdoc_files: []
|
|
229
243
|
files:
|
|
244
|
+
- ".github/workflows/ci-tests.yml"
|
|
230
245
|
- ".gitignore"
|
|
231
246
|
- ".rspec"
|
|
232
247
|
- ".rubocop.yml"
|
|
233
248
|
- ".ruby-gemset"
|
|
234
249
|
- ".ruby-version"
|
|
235
|
-
- ".travis.yml"
|
|
236
250
|
- CODE_OF_CONDUCT.md
|
|
237
251
|
- Gemfile
|
|
238
252
|
- Gemfile.lock
|
data/.travis.yml
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
language: ruby
|
|
3
|
-
cache: bundler
|
|
4
|
-
rvm:
|
|
5
|
-
- 2.7.0
|
|
6
|
-
- 2.6.0
|
|
7
|
-
- 2.5.0
|
|
8
|
-
before_install: gem install bundler -v 2.0.2
|
|
9
|
-
script:
|
|
10
|
-
- bundle exec bundle-audit check --update
|
|
11
|
-
- bundle exec rubocop
|
|
12
|
-
- bundle exec rspec
|
|
13
|
-
branches:
|
|
14
|
-
only:
|
|
15
|
-
- master
|