stream_lines 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 222d5f5ba77ac40c17fe4faf3ba9aab8d611e042e432ecbb2b6328c63b3856fe
4
- data.tar.gz: 79664c06171bfcc6c253f01c912aa7862193f875e4dc2b8002f52f55c017b994
3
+ metadata.gz: 9e832d9790c71431c43722f116cca885d3fee985001b12f5d0c53cc17e78c7b8
4
+ data.tar.gz: 76338bc7e203bfa59f3aed2469db3694951dfd5905c6bcfe416b08f1512f3289
5
5
  SHA512:
6
- metadata.gz: dff1372c1048260c71d11fbab457ae9bd45723e52bf0485b141f8cc8885687d9cafe09d7a241d957a4f66ff6e8644fd65562a102b82ef73244607089e6774517
7
- data.tar.gz: 6c22a6aa984ac5c216e1ecfc290b79f4fbb2f330bf6327497d50cf71a3d0ef527b518c18efb319c542b89297b77c22fe7fe4bf067094dda9f6d1efe8bc1e4053
6
+ metadata.gz: 3d980bdddd41227418da10447e57599ee94cea52b7ea926e1cb3f0efd8a3b71b7e8f44c70ed7a62dc37ea31ac97e84a1f5d20677ecb933bd26be7c2348e533c5
7
+ data.tar.gz: 721610a59f59862772b0ea259b6f22c0e84f058d458538ae993478324e9e315dc6fa0b469eaebb56fd630f59bd43b3bb43956f2111855c22cb00bb744296018c
@@ -0,0 +1,30 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Tests
9
+
10
+ on:
11
+ push:
12
+ branches:
13
+ - main
14
+ pull_request:
15
+ branches:
16
+ - '*'
17
+
18
+ jobs:
19
+ test:
20
+ runs-on: ubuntu-latest
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ - name: Set up Ruby
24
+ uses: ruby/setup-ruby@v1
25
+ with:
26
+ ruby-version: 2.6
27
+ - name: Install dependencies
28
+ run: bundle install
29
+ - name: Run tests
30
+ run: bundle exec rake
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- stream_lines (0.3.1)
4
+ stream_lines (0.4.0)
5
5
  httparty (~> 0.14)
6
6
 
7
7
  GEM
@@ -9,70 +9,76 @@ GEM
9
9
  specs:
10
10
  addressable (2.7.0)
11
11
  public_suffix (>= 2.0.2, < 5.0)
12
- ast (2.4.0)
12
+ ast (2.4.1)
13
13
  awesome_print (1.8.0)
14
- backports (3.15.0)
15
- bundler-audit (0.6.1)
14
+ backports (3.17.2)
15
+ bundler-audit (0.7.0.1)
16
16
  bundler (>= 1.2.0, < 3)
17
- thor (~> 0.18)
17
+ thor (>= 0.18, < 2)
18
18
  bundler-gem_version_tasks (0.2.1)
19
- byebug (11.1.1)
19
+ byebug (11.1.3)
20
+ charlock_holmes (0.7.7)
20
21
  crack (0.4.3)
21
22
  safe_yaml (~> 1.0.0)
22
23
  diff-lcs (1.3)
23
24
  docile (1.3.2)
24
- ffi (1.11.3)
25
+ ffi (1.13.0)
25
26
  get_process_mem (0.2.5)
26
27
  ffi (~> 1.0)
27
- hashdiff (1.0.0)
28
- httparty (0.17.3)
28
+ hashdiff (1.0.1)
29
+ httparty (0.18.1)
29
30
  mime-types (~> 3.0)
30
31
  multi_xml (>= 0.5.2)
31
- jaro_winkler (1.5.4)
32
32
  memory_profiler (0.9.14)
33
33
  mime-types (3.3.1)
34
34
  mime-types-data (~> 3.2015)
35
- mime-types-data (3.2019.1009)
35
+ mime-types-data (3.2020.0512)
36
36
  multi_json (1.14.1)
37
37
  multi_xml (0.6.0)
38
38
  mustermann (1.1.1)
39
39
  ruby2_keywords (~> 0.0.1)
40
- parallel (1.19.1)
41
- parser (2.7.0.2)
42
- ast (~> 2.4.0)
43
- public_suffix (4.0.3)
44
- rack (2.0.8)
40
+ parallel (1.19.2)
41
+ parser (2.7.1.4)
42
+ ast (~> 2.4.1)
43
+ public_suffix (4.0.5)
44
+ rack (2.2.3)
45
45
  rack-protection (2.0.8.1)
46
46
  rack
47
47
  rainbow (3.0.0)
48
48
  rake (13.0.1)
49
+ regexp_parser (1.7.1)
50
+ rexml (3.2.4)
49
51
  rspec (3.9.0)
50
52
  rspec-core (~> 3.9.0)
51
53
  rspec-expectations (~> 3.9.0)
52
54
  rspec-mocks (~> 3.9.0)
53
- rspec-core (3.9.1)
54
- rspec-support (~> 3.9.1)
55
- rspec-expectations (3.9.0)
55
+ rspec-core (3.9.2)
56
+ rspec-support (~> 3.9.3)
57
+ rspec-expectations (3.9.2)
56
58
  diff-lcs (>= 1.2.0, < 2.0)
57
59
  rspec-support (~> 3.9.0)
58
60
  rspec-mocks (3.9.1)
59
61
  diff-lcs (>= 1.2.0, < 2.0)
60
62
  rspec-support (~> 3.9.0)
61
- rspec-support (3.9.2)
62
- rubocop (0.79.0)
63
- jaro_winkler (~> 1.5.1)
63
+ rspec-support (3.9.3)
64
+ rubocop (0.87.1)
64
65
  parallel (~> 1.10)
65
- parser (>= 2.7.0.1)
66
+ parser (>= 2.7.1.1)
66
67
  rainbow (>= 2.2.2, < 4.0)
68
+ regexp_parser (>= 1.7)
69
+ rexml
70
+ rubocop-ast (>= 0.1.0, < 1.0)
67
71
  ruby-progressbar (~> 1.7)
68
- unicode-display_width (>= 1.4.0, < 1.7)
72
+ unicode-display_width (>= 1.4.0, < 2.0)
73
+ rubocop-ast (0.1.0)
74
+ parser (>= 2.7.0.1)
69
75
  ruby-progressbar (1.10.1)
70
76
  ruby2_keywords (0.0.2)
71
77
  safe_yaml (1.0.5)
72
- simplecov (0.18.1)
78
+ simplecov (0.18.5)
73
79
  docile (~> 1.1)
74
- simplecov-html (~> 0.11.0)
75
- simplecov-html (0.11.0)
80
+ simplecov-html (~> 0.11)
81
+ simplecov-html (0.12.2)
76
82
  sinatra (2.0.8.1)
77
83
  mustermann (~> 1.0)
78
84
  rack (~> 2.0)
@@ -85,10 +91,10 @@ GEM
85
91
  rack-protection (= 2.0.8.1)
86
92
  sinatra (= 2.0.8.1)
87
93
  tilt (~> 2.0)
88
- thor (0.20.3)
94
+ thor (1.0.1)
89
95
  tilt (2.0.10)
90
- unicode-display_width (1.6.0)
91
- webmock (3.8.1)
96
+ unicode-display_width (1.7.0)
97
+ webmock (3.8.3)
92
98
  addressable (>= 2.3.6)
93
99
  crack (>= 0.3.2)
94
100
  hashdiff (>= 0.4.0, < 2.0.0)
@@ -102,11 +108,12 @@ DEPENDENCIES
102
108
  bundler-audit
103
109
  bundler-gem_version_tasks
104
110
  byebug
111
+ charlock_holmes
105
112
  get_process_mem
106
113
  memory_profiler
107
114
  rake (~> 13.0)
108
115
  rspec (~> 3.0)
109
- rubocop (~> 0.79.0)
116
+ rubocop (~> 0.87.1)
110
117
  simplecov (~> 0.17)
111
118
  sinatra (~> 2.0)
112
119
  sinatra-contrib (~> 2.0)
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # StreamLines
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/stream_lines.svg)](https://badge.fury.io/rb/stream_lines)
4
- [![Build Status](https://travis-ci.com/jdlubrano/stream_lines.svg?branch=master)](https://travis-ci.com/jdlubrano/stream_lines)
4
+ ![](https://github.com/jdlubrano/stream_lines/workflows/Tests/badge.svg)
5
5
 
6
6
  An API for streaming files from remote locations one line at a time.
7
7
 
@@ -119,10 +119,10 @@ To install this gem onto your local machine, run `bundle exec rake install`.
119
119
 
120
120
  ## Releasing
121
121
 
122
- After merging in the new functionality to the master branch:
122
+ After merging in the new functionality to the main branch:
123
123
 
124
124
  ```
125
- git checkout master
125
+ git checkout main
126
126
  git pull --prune
127
127
  bundle exec rake version:bump:<major, minor, or patch>
128
128
  bundle exec rubocop -a
@@ -135,7 +135,7 @@ bundle exec rake release
135
135
  Bug reports and pull requests are welcome on GitHub at
136
136
  https://github.com/jdlubrano/stream_lines. This project is intended to be a
137
137
  safe, welcoming space for collaboration, and contributors are expected to
138
- adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/master/CODE_OF_CONDUCT.md).
138
+ adhere to the [code of conduct](https://github.com/jdlubrano/stream_lines/blob/main/CODE_OF_CONDUCT.md).
139
139
 
140
140
  ## License
141
141
 
@@ -3,7 +3,9 @@
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'bundler/setup'
6
+ require 'byebug'
6
7
  require 'stream_lines'
8
+ require 'charlock_holmes/string'
7
9
 
8
10
  # You can add fixtures and/or initialization code here to make experimenting
9
11
  # with your gem easier. You can also use a different console, if you like.
@@ -23,7 +23,9 @@ module StreamLines
23
23
  def initialize(url, **csv_options)
24
24
  @url = url
25
25
  @csv_options = accepted_csv_options(csv_options)
26
- @stream = Stream.new(url)
26
+
27
+ encoding = @csv_options[:encoding] || Encoding.default_external
28
+ @stream = Stream.new(url, encoding: encoding)
27
29
  end
28
30
 
29
31
  def each(&block)
@@ -8,10 +8,10 @@ module StreamLines
8
8
  class JSONLines
9
9
  include Enumerable
10
10
 
11
- def initialize(url, **json_options)
11
+ def initialize(url, encoding: Encoding.default_external, **json_options)
12
12
  @url = url
13
13
  @json_options = json_options
14
- @stream = Stream.new(url)
14
+ @stream = Stream.new(url, encoding: encoding)
15
15
  end
16
16
 
17
17
  def each(&block)
@@ -12,9 +12,10 @@ module StreamLines
12
12
 
13
13
  raise_on 400..599
14
14
 
15
- def initialize(url)
15
+ def initialize(url, encoding: Encoding.default_external)
16
16
  @url = url
17
- @buffer = StringIO.new
17
+ @encoding = encoding
18
+ @buffer = String.new(encoding: @encoding)
18
19
  end
19
20
 
20
21
  def each(&block)
@@ -33,22 +34,36 @@ module StreamLines
33
34
  lines.each { |line| block.call(line) }
34
35
  end
35
36
 
36
- @buffer.rewind
37
- block.call(@buffer.read) if @buffer.size.positive?
37
+ block.call(@buffer) if @buffer.size.positive?
38
38
  end
39
39
 
40
40
  def extract_lines(chunk)
41
- lines = chunk.split($INPUT_RECORD_SEPARATOR, -1)
41
+ encoded_chunk = @buffer + chunk.to_s.dup.force_encoding(@encoding)
42
+ lines = split_lines(encoded_chunk)
43
+ @buffer = String.new(encoding: @encoding)
44
+ @buffer << lines.pop.to_s
42
45
 
43
- if lines.length > 1
44
- @buffer.rewind
45
- lines.first.prepend(@buffer.read)
46
- @buffer = StringIO.new
47
- end
48
-
49
- @buffer << lines.pop
50
46
  lines
51
47
  end
48
+
49
+ def split_lines(encoded_chunk)
50
+ encoded_chunk.split($INPUT_RECORD_SEPARATOR, -1)
51
+ rescue ArgumentError => e
52
+ raise e unless /invalid byte sequence/.match?(e.message)
53
+
54
+ # NOTE: (jdlubrano)
55
+ # The last byte in the chunk is most likely a part of a multibyte
56
+ # character that, on its own, is an invalid byte sequence. So, we
57
+ # want to split the lines containing all valid bytes and make the
58
+ # trailing bytes the last line. The last line eventually gets added
59
+ # to the buffer, prepended to the next chunk, and, hopefully, restores
60
+ # a valid byte sequence.
61
+ last_newline_index = encoded_chunk.rindex($INPUT_RECORD_SEPARATOR)
62
+ return [encoded_chunk] if last_newline_index.nil?
63
+
64
+ valid_lines = encoded_chunk[0...last_newline_index].split($INPUT_RECORD_SEPARATOR, -1)
65
+ valid_lines + [encoded_chunk[(last_newline_index + 1)..-1]].compact
66
+ end
52
67
  end
53
68
  end
54
69
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module StreamLines
4
- VERSION = '0.3.1'
4
+ VERSION = '0.4.0'
5
5
  end
@@ -33,11 +33,12 @@ Gem::Specification.new do |spec|
33
33
  spec.add_development_dependency 'bundler-audit'
34
34
  spec.add_development_dependency 'bundler-gem_version_tasks'
35
35
  spec.add_development_dependency 'byebug'
36
+ spec.add_development_dependency 'charlock_holmes'
36
37
  spec.add_development_dependency 'get_process_mem'
37
38
  spec.add_development_dependency 'memory_profiler'
38
39
  spec.add_development_dependency 'rake', '~> 13.0'
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
40
- spec.add_development_dependency 'rubocop', '~> 0.79.0'
41
+ spec.add_development_dependency 'rubocop', '~> 0.87.1'
41
42
  spec.add_development_dependency 'simplecov', '~> 0.17'
42
43
  spec.add_development_dependency 'sinatra', '~> 2.0'
43
44
  spec.add_development_dependency 'sinatra-contrib', '~> 2.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stream_lines
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joel Lubrano
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-04 00:00:00.000000000 Z
11
+ date: 2020-07-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: charlock_holmes
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: get_process_mem
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -156,14 +170,14 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: 0.79.0
173
+ version: 0.87.1
160
174
  type: :development
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: 0.79.0
180
+ version: 0.87.1
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: simplecov
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -227,12 +241,12 @@ executables: []
227
241
  extensions: []
228
242
  extra_rdoc_files: []
229
243
  files:
244
+ - ".github/workflows/ci-tests.yml"
230
245
  - ".gitignore"
231
246
  - ".rspec"
232
247
  - ".rubocop.yml"
233
248
  - ".ruby-gemset"
234
249
  - ".ruby-version"
235
- - ".travis.yml"
236
250
  - CODE_OF_CONDUCT.md
237
251
  - Gemfile
238
252
  - Gemfile.lock
@@ -1,15 +0,0 @@
1
- ---
2
- language: ruby
3
- cache: bundler
4
- rvm:
5
- - 2.7.0
6
- - 2.6.0
7
- - 2.5.0
8
- before_install: gem install bundler -v 2.0.2
9
- script:
10
- - bundle exec bundle-audit check --update
11
- - bundle exec rubocop
12
- - bundle exec rspec
13
- branches:
14
- only:
15
- - master