stream_lines 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 222d5f5ba77ac40c17fe4faf3ba9aab8d611e042e432ecbb2b6328c63b3856fe
4
- data.tar.gz: 79664c06171bfcc6c253f01c912aa7862193f875e4dc2b8002f52f55c017b994
3
+ metadata.gz: 9e832d9790c71431c43722f116cca885d3fee985001b12f5d0c53cc17e78c7b8
4
+ data.tar.gz: 76338bc7e203bfa59f3aed2469db3694951dfd5905c6bcfe416b08f1512f3289
5
5
  SHA512:
6
- metadata.gz: dff1372c1048260c71d11fbab457ae9bd45723e52bf0485b141f8cc8885687d9cafe09d7a241d957a4f66ff6e8644fd65562a102b82ef73244607089e6774517
7
- data.tar.gz: 6c22a6aa984ac5c216e1ecfc290b79f4fbb2f330bf6327497d50cf71a3d0ef527b518c18efb319c542b89297b77c22fe7fe4bf067094dda9f6d1efe8bc1e4053
6
+ metadata.gz: 3d980bdddd41227418da10447e57599ee94cea52b7ea926e1cb3f0efd8a3b71b7e8f44c70ed7a62dc37ea31ac97e84a1f5d20677ecb933bd26be7c2348e533c5
7
+ data.tar.gz: 721610a59f59862772b0ea259b6f22c0e84f058d458538ae993478324e9e315dc6fa0b469eaebb56fd630f59bd43b3bb43956f2111855c22cb00bb744296018c
@@ -0,0 +1,30 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Tests
9
+
10
+ on:
11
+ push:
12
+ branches:
13
+ - main
14
+ pull_request:
15
+ branches:
16
+ - '*'
17
+
18
+ jobs:
19
+ test:
20
+ runs-on: ubuntu-latest
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ - name: Set up Ruby
24
+ uses: ruby/setup-ruby@v1
25
+ with:
26
+ ruby-version: 2.6
27
+ - name: Install dependencies
28
+ run: bundle install
29
+ - name: Run tests
30
+ run: bundle exec rake
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- stream_lines (0.3.1)
4
+ stream_lines (0.4.0)
5
5
  httparty (~> 0.14)
6
6
 
7
7
  GEM
@@ -9,70 +9,76 @@ GEM
9
9
  specs:
10
10
  addressable (2.7.0)
11
11
  public_suffix (>= 2.0.2, < 5.0)
12
- ast (2.4.0)
12
+ ast (2.4.1)
13
13
  awesome_print (1.8.0)
14
- backports (3.15.0)
15
- bundler-audit (0.6.1)
14
+ backports (3.17.2)
15
+ bundler-audit (0.7.0.1)
16
16
  bundler (>= 1.2.0, < 3)
17
- thor (~> 0.18)
17
+ thor (>= 0.18, < 2)
18
18
  bundler-gem_version_tasks (0.2.1)
19
- byebug (11.1.1)
19
+ byebug (11.1.3)
20
+ charlock_holmes (0.7.7)
20
21
  crack (0.4.3)
21
22
  safe_yaml (~> 1.0.0)
22
23
  diff-lcs (1.3)
23
24
  docile (1.3.2)
24
- ffi (1.11.3)
25
+ ffi (1.13.0)
25
26
  get_process_mem (0.2.5)
26
27
  ffi (~> 1.0)
27
- hashdiff (1.0.0)
28
- httparty (0.17.3)
28
+ hashdiff (1.0.1)
29
+ httparty (0.18.1)
29
30
  mime-types (~> 3.0)
30
31
  multi_xml (>= 0.5.2)
31
- jaro_winkler (1.5.4)
32
32
  memory_profiler (0.9.14)
33
33
  mime-types (3.3.1)
34
34
  mime-types-data (~> 3.2015)
35
- mime-types-data (3.2019.1009)
35
+ mime-types-data (3.2020.0512)
36
36
  multi_json (1.14.1)
37
37
  multi_xml (0.6.0)
38
38
  mustermann (1.1.1)
39
39
  ruby2_keywords (~> 0.0.1)
40
- parallel (1.19.1)
41
- parser (2.7.0.2)
42
- ast (~> 2.4.0)
43
- public_suffix (4.0.3)
44
- rack (2.0.8)
40
+ parallel (1.19.2)
41
+ parser (2.7.1.4)
42
+ ast (~> 2.4.1)
43
+ public_suffix (4.0.5)
44
+ rack (2.2.3)
45
45
  rack-protection (2.0.8.1)
46
46
  rack
47
47
  rainbow (3.0.0)
48
48
  rake (13.0.1)
49
+ regexp_parser (1.7.1)
50
+ rexml (3.2.4)
49
51
  rspec (3.9.0)
50
52
  rspec-core (~> 3.9.0)
51
53
  rspec-expectations (~> 3.9.0)
52
54
  rspec-mocks (~> 3.9.0)
53
- rspec-core (3.9.1)
54
- rspec-support (~> 3.9.1)
55
- rspec-expectations (3.9.0)
55
+ rspec-core (3.9.2)
56
+ rspec-support (~> 3.9.3)
57
+ rspec-expectations (3.9.2)
56
58
  diff-lcs (>= 1.2.0, < 2.0)
57
59
  rspec-support (~> 3.9.0)
58
60
  rspec-mocks (3.9.1)
59
61
  diff-lcs (>= 1.2.0, < 2.0)
60
62
  rspec-support (~> 3.9.0)
61
- rspec-support (3.9.2)
62
- rubocop (0.79.0)
63
- jaro_winkler (~> 1.5.1)
63
+ rspec-support (3.9.3)
64
+ rubocop (0.87.1)
64
65
  parallel (~> 1.10)
65
- parser (>= 2.7.0.1)
66
+ parser (>= 2.7.1.1)
66
67
  rainbow (>= 2.2.2, < 4.0)
68
+ regexp_parser (>= 1.7)
69
+ rexml
70
+ rubocop-ast (>= 0.1.0, < 1.0)
67
71
  ruby-progressbar (~> 1.7)
68
- unicode-display_width (>= 1.4.0, < 1.7)
72
+ unicode-display_width (>= 1.4.0, < 2.0)
73
+ rubocop-ast (0.1.0)
74
+ parser (>= 2.7.0.1)
69
75
  ruby-progressbar (1.10.1)
70
76
  ruby2_keywords (0.0.2)
71
77
  safe_yaml (1.0.5)
72
- simplecov (0.18.1)
78
+ simplecov (0.18.5)
73
79
  docile (~> 1.1)
74
- simplecov-html (~> 0.11.0)
75
- simplecov-html (0.11.0)
80
+ simplecov-html (~> 0.11)
81
+ simplecov-html (0.12.2)
76
82
  sinatra (2.0.8.1)
77
83
  mustermann (~> 1.0)
78
84
  rack (~> 2.0)
@@ -85,10 +91,10 @@ GEM
85
91
  rack-protection (= 2.0.8.1)
86
92
  sinatra (= 2.0.8.1)
87
93
  tilt (~> 2.0)
88
- thor (0.20.3)
94
+ thor (1.0.1)
89
95
  tilt (2.0.10)
90
- unicode-display_width (1.6.0)
91
- webmock (3.8.1)
96
+ unicode-display_width (1.7.0)
97
+ webmock (3.8.3)
92
98
  addressable (>= 2.3.6)
93
99
  crack (>= 0.3.2)
94
100
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -102,11 +108,12 @@ DEPENDENCIES
102
108
  bundler-audit
103
109
  bundler-gem_version_tasks
104
110
  byebug
111
+ charlock_holmes
105
112
  get_process_mem
106
113
  memory_profiler
107
114
  rake (~> 13.0)
108
115
  rspec (~> 3.0)
109
- rubocop (~> 0.79.0)
116
+ rubocop (~> 0.87.1)
110
117
  simplecov (~> 0.17)
111
118
  sinatra (~> 2.0)
112
119
  sinatra-contrib (~> 2.0)
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # StreamLines
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/stream_lines.svg)](https://badge.fury.io/rb/stream_lines)
4
- [![Build Status](https://travis-ci.com/jdlubrano/stream_lines.svg?branch=master)](https://travis-ci.com/jdlubrano/stream_lines)
4
+ ![](https://github.com/jdlubrano/stream_lines/workflows/Tests/badge.svg)
5
5
 
6
6
  An API for streaming files from remote locations one line at a time.
7
7
 
@@ -119,10 +119,10 @@ To install this gem onto your local machine, run `bundle exec rake install`.
119
119
 
120
120
  ## Releasing
121
121
 
122
- After merging in the new functionality to the master branch:
122
+ After merging in the new functionality to the main branch:
123
123
 
124
124
  ```
125
- git checkout master
125
+ git checkout main
126
126
  git pull --prune
127
127
  bundle exec rake version:bump:<major, minor, or patch>
128
128
  bundle exec rubocop -a
@@ -135,7 +135,7 @@ bundle exec rake release
135
135
  Bug reports and pull requests are welcome on GitHub at
136
136
  https://github.com/jdlubrano/stream_lines. This project is intended to be a
137
137
  safe, welcoming space for collaboration, and contributors are expected to
138
- adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/master/CODE_OF_CONDUCT.md).
138
+ adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/main/CODE_OF_CONDUCT.md).
139
139
 
140
140
  ## License
141
141
 
@@ -3,7 +3,9 @@
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'bundler/setup'
6
+ require 'byebug'
6
7
  require 'stream_lines'
8
+ require 'charlock_holmes/string'
7
9
 
8
10
  # You can add fixtures and/or initialization code here to make experimenting
9
11
  # with your gem easier. You can also use a different console, if you like.
@@ -23,7 +23,9 @@ module StreamLines
23
23
  def initialize(url, **csv_options)
24
24
  @url = url
25
25
  @csv_options = accepted_csv_options(csv_options)
26
- @stream = Stream.new(url)
26
+
27
+ encoding = @csv_options[:encoding] || Encoding.default_external
28
+ @stream = Stream.new(url, encoding: encoding)
27
29
  end
28
30
 
29
31
  def each(&block)
@@ -8,10 +8,10 @@ module StreamLines
8
8
  class JSONLines
9
9
  include Enumerable
10
10
 
11
- def initialize(url, **json_options)
11
+ def initialize(url, encoding: Encoding.default_external, **json_options)
12
12
  @url = url
13
13
  @json_options = json_options
14
- @stream = Stream.new(url)
14
+ @stream = Stream.new(url, encoding: encoding)
15
15
  end
16
16
 
17
17
  def each(&block)
@@ -12,9 +12,10 @@ module StreamLines
12
12
 
13
13
  raise_on 400..599
14
14
 
15
- def initialize(url)
15
+ def initialize(url, encoding: Encoding.default_external)
16
16
  @url = url
17
- @buffer = StringIO.new
17
+ @encoding = encoding
18
+ @buffer = String.new(encoding: @encoding)
18
19
  end
19
20
 
20
21
  def each(&block)
@@ -33,22 +34,36 @@ module StreamLines
33
34
  lines.each { |line| block.call(line) }
34
35
  end
35
36
 
36
- @buffer.rewind
37
- block.call(@buffer.read) if @buffer.size.positive?
37
+ block.call(@buffer) if @buffer.size.positive?
38
38
  end
39
39
 
40
40
  def extract_lines(chunk)
41
- lines = chunk.split($INPUT_RECORD_SEPARATOR, -1)
41
+ encoded_chunk = @buffer + chunk.to_s.dup.force_encoding(@encoding)
42
+ lines = split_lines(encoded_chunk)
43
+ @buffer = String.new(encoding: @encoding)
44
+ @buffer << lines.pop.to_s
42
45
 
43
- if lines.length > 1
44
- @buffer.rewind
45
- lines.first.prepend(@buffer.read)
46
- @buffer = StringIO.new
47
- end
48
-
49
- @buffer << lines.pop
50
46
  lines
51
47
  end
48
+
49
+ def split_lines(encoded_chunk)
50
+ encoded_chunk.split($INPUT_RECORD_SEPARATOR, -1)
51
+ rescue ArgumentError => e
52
+ raise e unless /invalid byte sequence/.match?(e.message)
53
+
54
+ # NOTE: (jdlubrano)
55
+ # The last byte in the chunk is most likely a part of a multibyte
56
+ # character that, on its own, is an invalid byte sequence. So, we
57
+ # want to split the lines containing all valid bytes and make the
58
+ # trailing bytes the last line. The last line eventually gets added
59
+ # to the buffer, prepended to the next chunk, and, hopefully, restores
60
+ # a valid byte sequence.
61
+ last_newline_index = encoded_chunk.rindex($INPUT_RECORD_SEPARATOR)
62
+ return [encoded_chunk] if last_newline_index.nil?
63
+
64
+ valid_lines = encoded_chunk[0...last_newline_index].split($INPUT_RECORD_SEPARATOR, -1)
65
+ valid_lines + [encoded_chunk[(last_newline_index + 1)..-1]].compact
66
+ end
52
67
  end
53
68
  end
54
69
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module StreamLines
4
- VERSION = '0.3.1'
4
+ VERSION = '0.4.0'
5
5
  end
@@ -33,11 +33,12 @@ Gem::Specification.new do |spec|
33
33
  spec.add_development_dependency 'bundler-audit'
34
34
  spec.add_development_dependency 'bundler-gem_version_tasks'
35
35
  spec.add_development_dependency 'byebug'
36
+ spec.add_development_dependency 'charlock_holmes'
36
37
  spec.add_development_dependency 'get_process_mem'
37
38
  spec.add_development_dependency 'memory_profiler'
38
39
  spec.add_development_dependency 'rake', '~> 13.0'
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
40
- spec.add_development_dependency 'rubocop', '~> 0.79.0'
41
+ spec.add_development_dependency 'rubocop', '~> 0.87.1'
41
42
  spec.add_development_dependency 'simplecov', '~> 0.17'
42
43
  spec.add_development_dependency 'sinatra', '~> 2.0'
43
44
  spec.add_development_dependency 'sinatra-contrib', '~> 2.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_lines
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joel Lubrano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-04 00:00:00.000000000 Z
11
+ date: 2020-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: charlock_holmes
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: get_process_mem
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -156,14 +170,14 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: 0.79.0
173
+ version: 0.87.1
160
174
  type: :development
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: 0.79.0
180
+ version: 0.87.1
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: simplecov
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -227,12 +241,12 @@ executables: []
227
241
  extensions: []
228
242
  extra_rdoc_files: []
229
243
  files:
244
+ - ".github/workflows/ci-tests.yml"
230
245
  - ".gitignore"
231
246
  - ".rspec"
232
247
  - ".rubocop.yml"
233
248
  - ".ruby-gemset"
234
249
  - ".ruby-version"
235
- - ".travis.yml"
236
250
  - CODE_OF_CONDUCT.md
237
251
  - Gemfile
238
252
  - Gemfile.lock
@@ -1,15 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.7.0
6
- - 2.6.0
7
- - 2.5.0
8
- before_install: gem install bundler -v 2.0.2
9
- script:
10
- - bundle exec bundle-audit check --update
11
- - bundle exec rubocop
12
- - bundle exec rspec
13
- branches:
14
- only:
15
- - master