stream_lines 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci-tests.yml +30 -0
- data/Gemfile.lock +38 -31
- data/README.md +4 -4
- data/bin/console +2 -0
- data/lib/stream_lines/reading/csv.rb +3 -1
- data/lib/stream_lines/reading/json_lines.rb +2 -2
- data/lib/stream_lines/reading/stream.rb +27 -12
- data/lib/stream_lines/version.rb +1 -1
- data/stream_lines.gemspec +2 -1
- metadata +19 -5
- data/.travis.yml +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e832d9790c71431c43722f116cca885d3fee985001b12f5d0c53cc17e78c7b8
|
4
|
+
data.tar.gz: 76338bc7e203bfa59f3aed2469db3694951dfd5905c6bcfe416b08f1512f3289
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3d980bdddd41227418da10447e57599ee94cea52b7ea926e1cb3f0efd8a3b71b7e8f44c70ed7a62dc37ea31ac97e84a1f5d20677ecb933bd26be7c2348e533c5
|
7
|
+
data.tar.gz: 721610a59f59862772b0ea259b6f22c0e84f058d458538ae993478324e9e315dc6fa0b469eaebb56fd630f59bd43b3bb43956f2111855c22cb00bb744296018c
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Tests
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches:
|
13
|
+
- main
|
14
|
+
pull_request:
|
15
|
+
branches:
|
16
|
+
- '*'
|
17
|
+
|
18
|
+
jobs:
|
19
|
+
test:
|
20
|
+
runs-on: ubuntu-latest
|
21
|
+
steps:
|
22
|
+
- uses: actions/checkout@v2
|
23
|
+
- name: Set up Ruby
|
24
|
+
uses: ruby/setup-ruby@v1
|
25
|
+
with:
|
26
|
+
ruby-version: 2.6
|
27
|
+
- name: Install dependencies
|
28
|
+
run: bundle install
|
29
|
+
- name: Run tests
|
30
|
+
run: bundle exec rake
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
stream_lines (0.
|
4
|
+
stream_lines (0.4.0)
|
5
5
|
httparty (~> 0.14)
|
6
6
|
|
7
7
|
GEM
|
@@ -9,70 +9,76 @@ GEM
|
|
9
9
|
specs:
|
10
10
|
addressable (2.7.0)
|
11
11
|
public_suffix (>= 2.0.2, < 5.0)
|
12
|
-
ast (2.4.
|
12
|
+
ast (2.4.1)
|
13
13
|
awesome_print (1.8.0)
|
14
|
-
backports (3.
|
15
|
-
bundler-audit (0.
|
14
|
+
backports (3.17.2)
|
15
|
+
bundler-audit (0.7.0.1)
|
16
16
|
bundler (>= 1.2.0, < 3)
|
17
|
-
thor (
|
17
|
+
thor (>= 0.18, < 2)
|
18
18
|
bundler-gem_version_tasks (0.2.1)
|
19
|
-
byebug (11.1.
|
19
|
+
byebug (11.1.3)
|
20
|
+
charlock_holmes (0.7.7)
|
20
21
|
crack (0.4.3)
|
21
22
|
safe_yaml (~> 1.0.0)
|
22
23
|
diff-lcs (1.3)
|
23
24
|
docile (1.3.2)
|
24
|
-
ffi (1.
|
25
|
+
ffi (1.13.0)
|
25
26
|
get_process_mem (0.2.5)
|
26
27
|
ffi (~> 1.0)
|
27
|
-
hashdiff (1.0.
|
28
|
-
httparty (0.
|
28
|
+
hashdiff (1.0.1)
|
29
|
+
httparty (0.18.1)
|
29
30
|
mime-types (~> 3.0)
|
30
31
|
multi_xml (>= 0.5.2)
|
31
|
-
jaro_winkler (1.5.4)
|
32
32
|
memory_profiler (0.9.14)
|
33
33
|
mime-types (3.3.1)
|
34
34
|
mime-types-data (~> 3.2015)
|
35
|
-
mime-types-data (3.
|
35
|
+
mime-types-data (3.2020.0512)
|
36
36
|
multi_json (1.14.1)
|
37
37
|
multi_xml (0.6.0)
|
38
38
|
mustermann (1.1.1)
|
39
39
|
ruby2_keywords (~> 0.0.1)
|
40
|
-
parallel (1.19.
|
41
|
-
parser (2.7.
|
42
|
-
ast (~> 2.4.
|
43
|
-
public_suffix (4.0.
|
44
|
-
rack (2.
|
40
|
+
parallel (1.19.2)
|
41
|
+
parser (2.7.1.4)
|
42
|
+
ast (~> 2.4.1)
|
43
|
+
public_suffix (4.0.5)
|
44
|
+
rack (2.2.3)
|
45
45
|
rack-protection (2.0.8.1)
|
46
46
|
rack
|
47
47
|
rainbow (3.0.0)
|
48
48
|
rake (13.0.1)
|
49
|
+
regexp_parser (1.7.1)
|
50
|
+
rexml (3.2.4)
|
49
51
|
rspec (3.9.0)
|
50
52
|
rspec-core (~> 3.9.0)
|
51
53
|
rspec-expectations (~> 3.9.0)
|
52
54
|
rspec-mocks (~> 3.9.0)
|
53
|
-
rspec-core (3.9.
|
54
|
-
rspec-support (~> 3.9.
|
55
|
-
rspec-expectations (3.9.
|
55
|
+
rspec-core (3.9.2)
|
56
|
+
rspec-support (~> 3.9.3)
|
57
|
+
rspec-expectations (3.9.2)
|
56
58
|
diff-lcs (>= 1.2.0, < 2.0)
|
57
59
|
rspec-support (~> 3.9.0)
|
58
60
|
rspec-mocks (3.9.1)
|
59
61
|
diff-lcs (>= 1.2.0, < 2.0)
|
60
62
|
rspec-support (~> 3.9.0)
|
61
|
-
rspec-support (3.9.
|
62
|
-
rubocop (0.
|
63
|
-
jaro_winkler (~> 1.5.1)
|
63
|
+
rspec-support (3.9.3)
|
64
|
+
rubocop (0.87.1)
|
64
65
|
parallel (~> 1.10)
|
65
|
-
parser (>= 2.7.
|
66
|
+
parser (>= 2.7.1.1)
|
66
67
|
rainbow (>= 2.2.2, < 4.0)
|
68
|
+
regexp_parser (>= 1.7)
|
69
|
+
rexml
|
70
|
+
rubocop-ast (>= 0.1.0, < 1.0)
|
67
71
|
ruby-progressbar (~> 1.7)
|
68
|
-
unicode-display_width (>= 1.4.0, <
|
72
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
73
|
+
rubocop-ast (0.1.0)
|
74
|
+
parser (>= 2.7.0.1)
|
69
75
|
ruby-progressbar (1.10.1)
|
70
76
|
ruby2_keywords (0.0.2)
|
71
77
|
safe_yaml (1.0.5)
|
72
|
-
simplecov (0.18.
|
78
|
+
simplecov (0.18.5)
|
73
79
|
docile (~> 1.1)
|
74
|
-
simplecov-html (~> 0.11
|
75
|
-
simplecov-html (0.
|
80
|
+
simplecov-html (~> 0.11)
|
81
|
+
simplecov-html (0.12.2)
|
76
82
|
sinatra (2.0.8.1)
|
77
83
|
mustermann (~> 1.0)
|
78
84
|
rack (~> 2.0)
|
@@ -85,10 +91,10 @@ GEM
|
|
85
91
|
rack-protection (= 2.0.8.1)
|
86
92
|
sinatra (= 2.0.8.1)
|
87
93
|
tilt (~> 2.0)
|
88
|
-
thor (0.
|
94
|
+
thor (1.0.1)
|
89
95
|
tilt (2.0.10)
|
90
|
-
unicode-display_width (1.
|
91
|
-
webmock (3.8.
|
96
|
+
unicode-display_width (1.7.0)
|
97
|
+
webmock (3.8.3)
|
92
98
|
addressable (>= 2.3.6)
|
93
99
|
crack (>= 0.3.2)
|
94
100
|
hashdiff (>= 0.4.0, < 2.0.0)
|
@@ -102,11 +108,12 @@ DEPENDENCIES
|
|
102
108
|
bundler-audit
|
103
109
|
bundler-gem_version_tasks
|
104
110
|
byebug
|
111
|
+
charlock_holmes
|
105
112
|
get_process_mem
|
106
113
|
memory_profiler
|
107
114
|
rake (~> 13.0)
|
108
115
|
rspec (~> 3.0)
|
109
|
-
rubocop (~> 0.
|
116
|
+
rubocop (~> 0.87.1)
|
110
117
|
simplecov (~> 0.17)
|
111
118
|
sinatra (~> 2.0)
|
112
119
|
sinatra-contrib (~> 2.0)
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# StreamLines
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/stream_lines.svg)](https://badge.fury.io/rb/stream_lines)
|
4
|
-
|
4
|
+
![](https://github.com/jdlubrano/stream_lines/workflows/Tests/badge.svg)
|
5
5
|
|
6
6
|
An API for streaming files from remote locations one line at a time.
|
7
7
|
|
@@ -119,10 +119,10 @@ To install this gem onto your local machine, run `bundle exec rake install`.
|
|
119
119
|
|
120
120
|
## Releasing
|
121
121
|
|
122
|
-
After merging in the new functionality to the
|
122
|
+
After merging in the new functionality to the main branch:
|
123
123
|
|
124
124
|
```
|
125
|
-
git checkout
|
125
|
+
git checkout main
|
126
126
|
git pull --prune
|
127
127
|
bundle exec rake version:bump:<major, minor, or patch>
|
128
128
|
bundle exec rubocop -a
|
@@ -135,7 +135,7 @@ bundle exec rake release
|
|
135
135
|
Bug reports and pull requests are welcome on GitHub at
|
136
136
|
https://github.com/jdlubrano/stream_lines. This project is intended to be a
|
137
137
|
safe, welcoming space for collaboration, and contributors are expected to
|
138
|
-
adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/
|
138
|
+
adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/main/CODE_OF_CONDUCT.md).
|
139
139
|
|
140
140
|
## License
|
141
141
|
|
data/bin/console
CHANGED
@@ -3,7 +3,9 @@
|
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
require 'bundler/setup'
|
6
|
+
require 'byebug'
|
6
7
|
require 'stream_lines'
|
8
|
+
require 'charlock_holmes/string'
|
7
9
|
|
8
10
|
# You can add fixtures and/or initialization code here to make experimenting
|
9
11
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -23,7 +23,9 @@ module StreamLines
|
|
23
23
|
def initialize(url, **csv_options)
|
24
24
|
@url = url
|
25
25
|
@csv_options = accepted_csv_options(csv_options)
|
26
|
-
|
26
|
+
|
27
|
+
encoding = @csv_options[:encoding] || Encoding.default_external
|
28
|
+
@stream = Stream.new(url, encoding: encoding)
|
27
29
|
end
|
28
30
|
|
29
31
|
def each(&block)
|
@@ -8,10 +8,10 @@ module StreamLines
|
|
8
8
|
class JSONLines
|
9
9
|
include Enumerable
|
10
10
|
|
11
|
-
def initialize(url, **json_options)
|
11
|
+
def initialize(url, encoding: Encoding.default_external, **json_options)
|
12
12
|
@url = url
|
13
13
|
@json_options = json_options
|
14
|
-
@stream = Stream.new(url)
|
14
|
+
@stream = Stream.new(url, encoding: encoding)
|
15
15
|
end
|
16
16
|
|
17
17
|
def each(&block)
|
@@ -12,9 +12,10 @@ module StreamLines
|
|
12
12
|
|
13
13
|
raise_on 400..599
|
14
14
|
|
15
|
-
def initialize(url)
|
15
|
+
def initialize(url, encoding: Encoding.default_external)
|
16
16
|
@url = url
|
17
|
-
@
|
17
|
+
@encoding = encoding
|
18
|
+
@buffer = String.new(encoding: @encoding)
|
18
19
|
end
|
19
20
|
|
20
21
|
def each(&block)
|
@@ -33,22 +34,36 @@ module StreamLines
|
|
33
34
|
lines.each { |line| block.call(line) }
|
34
35
|
end
|
35
36
|
|
36
|
-
@buffer.
|
37
|
-
block.call(@buffer.read) if @buffer.size.positive?
|
37
|
+
block.call(@buffer) if @buffer.size.positive?
|
38
38
|
end
|
39
39
|
|
40
40
|
def extract_lines(chunk)
|
41
|
-
|
41
|
+
encoded_chunk = @buffer + chunk.to_s.dup.force_encoding(@encoding)
|
42
|
+
lines = split_lines(encoded_chunk)
|
43
|
+
@buffer = String.new(encoding: @encoding)
|
44
|
+
@buffer << lines.pop.to_s
|
42
45
|
|
43
|
-
if lines.length > 1
|
44
|
-
@buffer.rewind
|
45
|
-
lines.first.prepend(@buffer.read)
|
46
|
-
@buffer = StringIO.new
|
47
|
-
end
|
48
|
-
|
49
|
-
@buffer << lines.pop
|
50
46
|
lines
|
51
47
|
end
|
48
|
+
|
49
|
+
def split_lines(encoded_chunk)
|
50
|
+
encoded_chunk.split($INPUT_RECORD_SEPARATOR, -1)
|
51
|
+
rescue ArgumentError => e
|
52
|
+
raise e unless /invalid byte sequence/.match?(e.message)
|
53
|
+
|
54
|
+
# NOTE: (jdlubrano)
|
55
|
+
# The last byte in the chunk is most likely a part of a multibyte
|
56
|
+
# character that, on its own, is an invalid byte sequence. So, we
|
57
|
+
# want to split the lines containing all valid bytes and make the
|
58
|
+
# trailing bytes the last line. The last line eventually gets added
|
59
|
+
# to the buffer, prepended to the next chunk, and, hopefully, restores
|
60
|
+
# a valid byte sequence.
|
61
|
+
last_newline_index = encoded_chunk.rindex($INPUT_RECORD_SEPARATOR)
|
62
|
+
return [encoded_chunk] if last_newline_index.nil?
|
63
|
+
|
64
|
+
valid_lines = encoded_chunk[0...last_newline_index].split($INPUT_RECORD_SEPARATOR, -1)
|
65
|
+
valid_lines + [encoded_chunk[(last_newline_index + 1)..-1]].compact
|
66
|
+
end
|
52
67
|
end
|
53
68
|
end
|
54
69
|
end
|
data/lib/stream_lines/version.rb
CHANGED
data/stream_lines.gemspec
CHANGED
@@ -33,11 +33,12 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.add_development_dependency 'bundler-audit'
|
34
34
|
spec.add_development_dependency 'bundler-gem_version_tasks'
|
35
35
|
spec.add_development_dependency 'byebug'
|
36
|
+
spec.add_development_dependency 'charlock_holmes'
|
36
37
|
spec.add_development_dependency 'get_process_mem'
|
37
38
|
spec.add_development_dependency 'memory_profiler'
|
38
39
|
spec.add_development_dependency 'rake', '~> 13.0'
|
39
40
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
40
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
41
|
+
spec.add_development_dependency 'rubocop', '~> 0.87.1'
|
41
42
|
spec.add_development_dependency 'simplecov', '~> 0.17'
|
42
43
|
spec.add_development_dependency 'sinatra', '~> 2.0'
|
43
44
|
spec.add_development_dependency 'sinatra-contrib', '~> 2.0'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_lines
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joel Lubrano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: charlock_holmes
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: get_process_mem
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,14 +170,14 @@ dependencies:
|
|
156
170
|
requirements:
|
157
171
|
- - "~>"
|
158
172
|
- !ruby/object:Gem::Version
|
159
|
-
version: 0.
|
173
|
+
version: 0.87.1
|
160
174
|
type: :development
|
161
175
|
prerelease: false
|
162
176
|
version_requirements: !ruby/object:Gem::Requirement
|
163
177
|
requirements:
|
164
178
|
- - "~>"
|
165
179
|
- !ruby/object:Gem::Version
|
166
|
-
version: 0.
|
180
|
+
version: 0.87.1
|
167
181
|
- !ruby/object:Gem::Dependency
|
168
182
|
name: simplecov
|
169
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -227,12 +241,12 @@ executables: []
|
|
227
241
|
extensions: []
|
228
242
|
extra_rdoc_files: []
|
229
243
|
files:
|
244
|
+
- ".github/workflows/ci-tests.yml"
|
230
245
|
- ".gitignore"
|
231
246
|
- ".rspec"
|
232
247
|
- ".rubocop.yml"
|
233
248
|
- ".ruby-gemset"
|
234
249
|
- ".ruby-version"
|
235
|
-
- ".travis.yml"
|
236
250
|
- CODE_OF_CONDUCT.md
|
237
251
|
- Gemfile
|
238
252
|
- Gemfile.lock
|
data/.travis.yml
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
---
|
2
|
-
language: ruby
|
3
|
-
cache: bundler
|
4
|
-
rvm:
|
5
|
-
- 2.7.0
|
6
|
-
- 2.6.0
|
7
|
-
- 2.5.0
|
8
|
-
before_install: gem install bundler -v 2.0.2
|
9
|
-
script:
|
10
|
-
- bundle exec bundle-audit check --update
|
11
|
-
- bundle exec rubocop
|
12
|
-
- bundle exec rspec
|
13
|
-
branches:
|
14
|
-
only:
|
15
|
-
- master
|