stream_lines 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +27 -4
- data/lib/stream_lines.rb +1 -0
- data/lib/stream_lines/reading/json_lines.rb +30 -0
- data/lib/stream_lines/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f86f4c30e240b9076fa0ede333d89a0974e48b57ca3c75c145031314b524973
|
4
|
+
data.tar.gz: 7ee39a6d12b3e3e54eac67f6ea1d0108ac8ecc57ad78676187fe4c3dfcfed293
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2edb6bdd8ebccbf8d99c9769303c132ae2016c628dec9fc7b499ea3cf95ef579f9781ac7c205cbf526a1a10062af5d14c56a4927058a9580b0b6c0636b186978
|
7
|
+
data.tar.gz: b56a5173a42cc761337d4d1f30116bc6e5d08f1e07ff32eb8c4edeef58a537d42949a21d696be21f3913f51413242e03a0bbe5f7e4176f8b8f63b3204722e1ee
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,13 +6,15 @@ An API for streaming files from remote locations one line at a time.
|
|
6
6
|
|
7
7
|
## Background
|
8
8
|
|
9
|
-
Some applications run in production environments without writable file system;
|
9
|
+
Some applications run in production environments without a writable file system;
|
10
10
|
usually this is a security measure. Futhermore, with the proliferation of
|
11
11
|
container-based production environments, containers may not have access to
|
12
12
|
tremendous amounts of memory. Thus, it can be impossible to read large files
|
13
13
|
unless you read the file into memory in small doses. A common pattern is to
|
14
14
|
use a line-delimited file like [JSON Lines](http://jsonlines.org) or a CSV
|
15
15
|
and to read the file one line at a time in order to iterate over a dataset.
|
16
|
+
This gem aims to provide an [Enumerable](https://ruby-doc.org/core-2.7.0/Enumerable.html)
|
17
|
+
interface for iterating over remote, line-delimited datasets.
|
16
18
|
|
17
19
|
## Installation
|
18
20
|
|
@@ -41,7 +43,7 @@ url = 'https://my.remote.file/file.txt'
|
|
41
43
|
stream = StreamLines::Reading::Stream.new(url)
|
42
44
|
|
43
45
|
stream.each do |line|
|
44
|
-
# Do something with the line of data
|
46
|
+
# Do something with the line of data (the line will be a String)
|
45
47
|
end
|
46
48
|
|
47
49
|
# A StreamLines::Reading::Stream object is Enumerable, so you can also use
|
@@ -61,14 +63,14 @@ url = 'https://my.remote.file/file.csv'
|
|
61
63
|
stream = StreamLines::Reading::CSV.new(url)
|
62
64
|
|
63
65
|
stream.each do |row|
|
64
|
-
# each row will be an
|
66
|
+
# each row will be an Array
|
65
67
|
end
|
66
68
|
|
67
69
|
# Supports most Ruby CSV options (see ignored options below)
|
68
70
|
stream = StreamLines::Reading::CSV.new(url, headers: true)
|
69
71
|
|
70
72
|
stream.each do |row|
|
71
|
-
# each row
|
73
|
+
# each row will be a CSV::Row object that you can access like row['column_name']
|
72
74
|
end
|
73
75
|
```
|
74
76
|
|
@@ -85,6 +87,27 @@ require additional logic in the `StreamLines::Reading::CSV#each` method.
|
|
85
87
|
Rather than attempting to implement sensible solutions for these options, I am
|
86
88
|
choosing to explicitly ignore them until there is enough outcry to support them.
|
87
89
|
|
90
|
+
##### JSON Lines/Streaming JSON
|
91
|
+
|
92
|
+
This gem provides first-class support for streaming
|
93
|
+
[JSON lines](http://jsonlines.org) from a remote URL.
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
url = 'https://my.remote.file/file.jsonl'
|
97
|
+
stream = StreamLines::Reading::JSONLines.new(url)
|
98
|
+
|
99
|
+
stream.each do |row|
|
100
|
+
# each row will be an Hash
|
101
|
+
end
|
102
|
+
|
103
|
+
# Supports all Ruby JSON::parse options
|
104
|
+
stream = StreamLines::Reading::JSONLines.new(url, symbolize_names: true)
|
105
|
+
|
106
|
+
stream.each do |row|
|
107
|
+
# each row will be a Hash
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
88
111
|
## Development
|
89
112
|
|
90
113
|
After checking out the repo, run `bin/setup` to install dependencies.
|
data/lib/stream_lines.rb
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'stream_lines/reading/stream'
|
5
|
+
|
6
|
+
module StreamLines
|
7
|
+
module Reading
|
8
|
+
class JSONLines
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
def initialize(url, **json_options)
|
12
|
+
@url = url
|
13
|
+
@json_options = json_options
|
14
|
+
@stream = Stream.new(url)
|
15
|
+
end
|
16
|
+
|
17
|
+
def each(&block)
|
18
|
+
@stream.each { |line| block.call(parse_line(line)) }
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :url
|
24
|
+
|
25
|
+
def parse_line(line)
|
26
|
+
JSON.parse(line, **@json_options)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/stream_lines/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_lines
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joel Lubrano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -245,6 +245,7 @@ files:
|
|
245
245
|
- lib/stream_lines/error.rb
|
246
246
|
- lib/stream_lines/reading.rb
|
247
247
|
- lib/stream_lines/reading/csv.rb
|
248
|
+
- lib/stream_lines/reading/json_lines.rb
|
248
249
|
- lib/stream_lines/reading/stream.rb
|
249
250
|
- lib/stream_lines/version.rb
|
250
251
|
- stream_lines.gemspec
|