stream_lines 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +27 -4
- data/lib/stream_lines.rb +1 -0
- data/lib/stream_lines/reading/json_lines.rb +30 -0
- data/lib/stream_lines/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5f86f4c30e240b9076fa0ede333d89a0974e48b57ca3c75c145031314b524973
|
4
|
+
data.tar.gz: 7ee39a6d12b3e3e54eac67f6ea1d0108ac8ecc57ad78676187fe4c3dfcfed293
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2edb6bdd8ebccbf8d99c9769303c132ae2016c628dec9fc7b499ea3cf95ef579f9781ac7c205cbf526a1a10062af5d14c56a4927058a9580b0b6c0636b186978
|
7
|
+
data.tar.gz: b56a5173a42cc761337d4d1f30116bc6e5d08f1e07ff32eb8c4edeef58a537d42949a21d696be21f3913f51413242e03a0bbe5f7e4176f8b8f63b3204722e1ee
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -6,13 +6,15 @@ An API for streaming files from remote locations one line at a time.
|
|
6
6
|
|
7
7
|
## Background
|
8
8
|
|
9
|
-
Some applications run in production environments without writable file system;
|
9
|
+
Some applications run in production environments without a writable file system;
|
10
10
|
usually this is a security measure. Futhermore, with the proliferation of
|
11
11
|
container-based production environments, containers may not have access to
|
12
12
|
tremendous amounts of memory. Thus, it can be impossible to read large files
|
13
13
|
unless you read the file into memory in small doses. A common pattern is to
|
14
14
|
use a line-delimited file like [JSON Lines](http://jsonlines.org) or a CSV
|
15
15
|
and to read the file one line at a time in order to iterate over a dataset.
|
16
|
+
This gem aims to provide an [Enumerable](https://ruby-doc.org/core-2.7.0/Enumerable.html)
|
17
|
+
interface for iterating over remote, line-delimited datasets.
|
16
18
|
|
17
19
|
## Installation
|
18
20
|
|
@@ -41,7 +43,7 @@ url = 'https://my.remote.file/file.txt'
|
|
41
43
|
stream = StreamLines::Reading::Stream.new(url)
|
42
44
|
|
43
45
|
stream.each do |line|
|
44
|
-
# Do something with the line of data
|
46
|
+
# Do something with the line of data (the line will be a String)
|
45
47
|
end
|
46
48
|
|
47
49
|
# A StreamLines::Reading::Stream object is Enumerable, so you can also use
|
@@ -61,14 +63,14 @@ url = 'https://my.remote.file/file.csv'
|
|
61
63
|
stream = StreamLines::Reading::CSV.new(url)
|
62
64
|
|
63
65
|
stream.each do |row|
|
64
|
-
# each row will be an
|
66
|
+
# each row will be an Array
|
65
67
|
end
|
66
68
|
|
67
69
|
# Supports most Ruby CSV options (see ignored options below)
|
68
70
|
stream = StreamLines::Reading::CSV.new(url, headers: true)
|
69
71
|
|
70
72
|
stream.each do |row|
|
71
|
-
# each row
|
73
|
+
# each row will be a CSV::Row object that you can access like row['column_name']
|
72
74
|
end
|
73
75
|
```
|
74
76
|
|
@@ -85,6 +87,27 @@ require additional logic in the `StreamLines::Reading::CSV#each` method.
|
|
85
87
|
Rather than attempting to implement sensible solutions for these options, I am
|
86
88
|
choosing to explicitly ignore them until there is enough outcry to support them.
|
87
89
|
|
90
|
+
##### JSON Lines/Streaming JSON
|
91
|
+
|
92
|
+
This gem provides first-class support for streaming
|
93
|
+
[JSON lines](http://jsonlines.org) from a remote URL.
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
url = 'https://my.remote.file/file.jsonl'
|
97
|
+
stream = StreamLines::Reading::JSONLines.new(url)
|
98
|
+
|
99
|
+
stream.each do |row|
|
100
|
+
# each row will be an Hash
|
101
|
+
end
|
102
|
+
|
103
|
+
# Supports all Ruby JSON::parse options
|
104
|
+
stream = StreamLines::Reading::JSONLines.new(url, symbolize_names: true)
|
105
|
+
|
106
|
+
stream.each do |row|
|
107
|
+
# each row will be a Hash
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
88
111
|
## Development
|
89
112
|
|
90
113
|
After checking out the repo, run `bin/setup` to install dependencies.
|
data/lib/stream_lines.rb
CHANGED
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require 'stream_lines/reading/stream'
|
5
|
+
|
6
|
+
module StreamLines
|
7
|
+
module Reading
|
8
|
+
class JSONLines
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
def initialize(url, **json_options)
|
12
|
+
@url = url
|
13
|
+
@json_options = json_options
|
14
|
+
@stream = Stream.new(url)
|
15
|
+
end
|
16
|
+
|
17
|
+
def each(&block)
|
18
|
+
@stream.each { |line| block.call(parse_line(line)) }
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :url
|
24
|
+
|
25
|
+
def parse_line(line)
|
26
|
+
JSON.parse(line, **@json_options)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/stream_lines/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stream_lines
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joel Lubrano
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
@@ -245,6 +245,7 @@ files:
|
|
245
245
|
- lib/stream_lines/error.rb
|
246
246
|
- lib/stream_lines/reading.rb
|
247
247
|
- lib/stream_lines/reading/csv.rb
|
248
|
+
- lib/stream_lines/reading/json_lines.rb
|
248
249
|
- lib/stream_lines/reading/stream.rb
|
249
250
|
- lib/stream_lines/version.rb
|
250
251
|
- stream_lines.gemspec
|