feedx 0.12.7 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -37
- data/.golangci.yml +13 -4
- data/.rubocop.yml +8 -14
- data/.tool-versions +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +54 -68
- data/Makefile +3 -3
- data/README.md +3 -1
- data/compression.go +29 -0
- data/compression_test.go +73 -61
- data/consumer.go +96 -152
- data/consumer_test.go +124 -59
- data/example_test.go +140 -0
- data/feedx.gemspec +2 -10
- data/feedx.go +16 -31
- data/feedx_ext_test.go +13 -3
- data/feedx_test.go +24 -26
- data/format.go +29 -19
- data/format_test.go +84 -56
- data/go.mod +11 -7
- data/go.sum +16 -138
- data/incremental.go +122 -0
- data/incremental_test.go +62 -0
- data/lib/feedx/cache/abstract.rb +3 -3
- data/lib/feedx/cache/value.rb +6 -6
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +4 -4
- data/lib/feedx/consumer.rb +8 -8
- data/lib/feedx/format/abstract.rb +6 -6
- data/lib/feedx/format/json.rb +2 -2
- data/lib/feedx/format/protobuf.rb +6 -6
- data/lib/feedx/format.rb +1 -3
- data/lib/feedx/producer.rb +11 -11
- data/lib/feedx/stream.rb +2 -2
- data/lib/feedx.rb +2 -3
- data/manifest.go +65 -0
- data/producer.go +34 -137
- data/producer_test.go +46 -60
- data/reader.go +142 -41
- data/reader_test.go +86 -35
- data/scheduler.go +176 -0
- data/scheduler_test.go +128 -0
- data/writer.go +13 -13
- data/writer_test.go +61 -44
- metadata +12 -137
- data/.github/workflows/lint.yml +0 -18
- data/ext/parquet/decoder.go +0 -59
- data/ext/parquet/decoder_test.go +0 -88
- data/ext/parquet/encoder.go +0 -27
- data/ext/parquet/encoder_test.go +0 -70
- data/ext/parquet/go.mod +0 -12
- data/ext/parquet/go.sum +0 -193
- data/ext/parquet/parquet.go +0 -78
- data/ext/parquet/parquet_test.go +0 -28
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/lib/feedx/format/parquet.rb +0 -102
- data/spec/feedx/cache/memory_spec.rb +0 -23
- data/spec/feedx/cache/value_spec.rb +0 -19
- data/spec/feedx/compression/gzip_spec.rb +0 -17
- data/spec/feedx/compression/none_spec.rb +0 -15
- data/spec/feedx/compression_spec.rb +0 -19
- data/spec/feedx/consumer_spec.rb +0 -49
- data/spec/feedx/format/abstract_spec.rb +0 -21
- data/spec/feedx/format/json_spec.rb +0 -27
- data/spec/feedx/format/parquet_spec.rb +0 -30
- data/spec/feedx/format/protobuf_spec.rb +0 -23
- data/spec/feedx/format_spec.rb +0 -21
- data/spec/feedx/producer_spec.rb +0 -74
- data/spec/feedx/stream_spec.rb +0 -109
- data/spec/spec_helper.rb +0 -57
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format::Protobuf do
|
|
4
|
-
let(:wio) { StringIO.new }
|
|
5
|
-
let(:rio) { StringIO.open(wio.string) }
|
|
6
|
-
|
|
7
|
-
it 'encode/decodes' do
|
|
8
|
-
subject.encoder wio do |enc|
|
|
9
|
-
enc.encode(Feedx::TestCase::Model.new('X'))
|
|
10
|
-
enc.encode(Feedx::TestCase::Model.new('Y'))
|
|
11
|
-
enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
|
|
12
|
-
end
|
|
13
|
-
expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
|
|
14
|
-
|
|
15
|
-
subject.decoder rio do |dec|
|
|
16
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
|
|
17
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
|
|
18
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
|
|
19
|
-
expect(dec.decode(Feedx::TestCase::Message)).to be_nil
|
|
20
|
-
expect(dec).to be_eof
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
data/spec/feedx/format_spec.rb
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format do
|
|
4
|
-
it 'resolves' do
|
|
5
|
-
expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
|
|
6
|
-
expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
|
|
7
|
-
expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it 'detects' do
|
|
11
|
-
expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
|
|
12
|
-
expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
|
|
13
|
-
expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
|
|
14
|
-
expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
|
|
15
|
-
expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
|
|
16
|
-
expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
|
|
17
|
-
expect do
|
|
18
|
-
described_class.detect('path/to/file.txt')
|
|
19
|
-
end.to raise_error(/unable to detect format/)
|
|
20
|
-
end
|
|
21
|
-
end
|
data/spec/feedx/producer_spec.rb
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Producer do
|
|
4
|
-
let :enumerable do
|
|
5
|
-
%w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
let(:bucket) { BFS::Bucket::InMem.new }
|
|
9
|
-
|
|
10
|
-
before { allow(BFS).to receive(:resolve).and_return(bucket) }
|
|
11
|
-
|
|
12
|
-
it 'rejects invalid inputs' do
|
|
13
|
-
expect do
|
|
14
|
-
described_class.perform 'mock:///dir/file.txt', enum: enumerable
|
|
15
|
-
end.to raise_error(/unable to detect format/)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it 'pushes compressed JSON' do
|
|
19
|
-
size = described_class.perform 'mock:///dir/file.jsonz', enum: enumerable
|
|
20
|
-
expect(size).to be_within(20).of(166)
|
|
21
|
-
expect(bucket.info('dir/file.jsonz').size).to eq(size)
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
it 'pushes plain JSON' do
|
|
25
|
-
size = described_class.perform 'mock:///dir/file.json', enum: enumerable
|
|
26
|
-
expect(size).to eq(15900)
|
|
27
|
-
expect(bucket.info('dir/file.json').size).to eq(size)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
it 'pushes compressed PB' do
|
|
31
|
-
size = described_class.perform 'mock:///dir/file.pbz', enum: enumerable
|
|
32
|
-
expect(size).to be_within(20).of(41)
|
|
33
|
-
expect(bucket.info('dir/file.pbz').size).to eq(size)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
it 'pushes plain PB' do
|
|
37
|
-
size = described_class.perform 'mock:///dir/file.pb', enum: enumerable
|
|
38
|
-
expect(size).to eq(1200)
|
|
39
|
-
expect(bucket.info('dir/file.pb').size).to eq(size)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
it 'supports factories' do
|
|
43
|
-
size = described_class.perform('mock:///dir/file.json') do
|
|
44
|
-
enumerable
|
|
45
|
-
end
|
|
46
|
-
expect(size).to eq(15900)
|
|
47
|
-
expect(bucket.info('dir/file.json').size).to eq(size)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
it 'supports last-modified' do
|
|
51
|
-
described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
52
|
-
expect(bucket.info('dir/file.json').metadata).to eq('X-Feedx-Last-Modified' => '1515151515000')
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
it 'performs conditionally' do
|
|
56
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
57
|
-
expect(size).to eq(15900)
|
|
58
|
-
|
|
59
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
60
|
-
expect(size).to eq(-1)
|
|
61
|
-
|
|
62
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151514), enum: enumerable
|
|
63
|
-
expect(size).to eq(-1)
|
|
64
|
-
|
|
65
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
|
|
66
|
-
expect(size).to eq(15900)
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
it 'accepts downstream options' do
|
|
70
|
-
expect do
|
|
71
|
-
described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
|
|
72
|
-
end.not_to raise_error
|
|
73
|
-
end
|
|
74
|
-
end
|
data/spec/feedx/stream_spec.rb
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Stream do
|
|
4
|
-
subject { described_class.new('mock:///dir/file.json') }
|
|
5
|
-
|
|
6
|
-
let(:bucket) { BFS::Bucket::InMem.new }
|
|
7
|
-
let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
|
|
8
|
-
|
|
9
|
-
before { allow(BFS).to receive(:resolve).and_return(bucket) }
|
|
10
|
-
|
|
11
|
-
after { subject.close; compressed.close }
|
|
12
|
-
|
|
13
|
-
it 'rejects invalid inputs' do
|
|
14
|
-
expect do
|
|
15
|
-
described_class.new('mock:///dir/file.txt')
|
|
16
|
-
end.to raise_error(/unable to detect format/)
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it 'accepts custom formats' do
|
|
20
|
-
format = Class.new do
|
|
21
|
-
def encoder(io, &block)
|
|
22
|
-
Feedx::Format::JSON::Encoder.open(io, &block)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def decoder(io, &block)
|
|
26
|
-
Feedx::Format::JSON::Decoder.open(io, &block)
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
|
|
31
|
-
stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
|
|
32
|
-
21
|
|
33
|
-
end
|
|
34
|
-
expect(result).to eq(21)
|
|
35
|
-
|
|
36
|
-
expect(bucket.read('dir/file.txt')).to eq(<<~JSON)
|
|
37
|
-
{"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
38
|
-
JSON
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
it 'encodes' do
|
|
42
|
-
subject.create do |s|
|
|
43
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
44
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
expect(bucket.read('dir/file.json')).to eq(<<~JSON)
|
|
48
|
-
{"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
49
|
-
{"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
50
|
-
JSON
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it 'encodes compressed' do
|
|
54
|
-
compressed.create do |s|
|
|
55
|
-
100.times do
|
|
56
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
expect(bucket.info('dir/file.json.gz').size).to be_within(10).of(108)
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
it 'encodes with create options' do
|
|
64
|
-
subject.create metadata: { 'x' => '5' } do |s|
|
|
65
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
66
|
-
end
|
|
67
|
-
expect(bucket.info('dir/file.json').metadata).to eq('X' => '5')
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it 'aborts encode on errors (if compressed)' do
|
|
71
|
-
stop = RuntimeError.new('STOP')
|
|
72
|
-
expect do
|
|
73
|
-
compressed.create do |s|
|
|
74
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
75
|
-
raise stop
|
|
76
|
-
end
|
|
77
|
-
end.to raise_error(stop)
|
|
78
|
-
|
|
79
|
-
expect(bucket.ls('**').to_a).to be_empty
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
it 'decodes' do
|
|
83
|
-
subject.create do |s|
|
|
84
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
85
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
subject.open do |s|
|
|
89
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
90
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
|
|
91
|
-
expect(s.decode(Feedx::TestCase::Model)).to be_nil
|
|
92
|
-
expect(s).to be_eof
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
it 'decodes compressed' do
|
|
97
|
-
compressed.create do |s|
|
|
98
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
99
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
compressed.open do |s|
|
|
103
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
104
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
|
|
105
|
-
expect(s.decode(Feedx::TestCase::Model)).to be_nil
|
|
106
|
-
expect(s).to be_eof
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
end
|
data/spec/spec_helper.rb
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
require 'rspec'
|
|
2
|
-
require 'feedx'
|
|
3
|
-
require 'google/protobuf'
|
|
4
|
-
|
|
5
|
-
Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
6
|
-
add_message 'com.blacksquaremedia.feedx.testcase.Message' do
|
|
7
|
-
optional :title, :string, 1
|
|
8
|
-
end
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
module Feedx
|
|
12
|
-
module TestCase
|
|
13
|
-
Message = Google::Protobuf::DescriptorPool.generated_pool.lookup('com.blacksquaremedia.feedx.testcase.Message').msgclass
|
|
14
|
-
|
|
15
|
-
class Model
|
|
16
|
-
attr_reader :title
|
|
17
|
-
|
|
18
|
-
def initialize(title)
|
|
19
|
-
@title = title
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def to_pb(*)
|
|
23
|
-
Feedx::TestCase::Message.new title: @title
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def ==(other)
|
|
27
|
-
title == other.title
|
|
28
|
-
end
|
|
29
|
-
alias eql? ==
|
|
30
|
-
|
|
31
|
-
def updated_at
|
|
32
|
-
Time.at(1515151515).utc
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def from_json(data, *)
|
|
36
|
-
hash = ::JSON.parse(data)
|
|
37
|
-
@title = hash['title'] if hash.is_a?(Hash)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def to_json(*)
|
|
41
|
-
::JSON.dump(title: @title, updated_at: updated_at)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def from_parquet(rec)
|
|
45
|
-
rec.each_pair do |name, value|
|
|
46
|
-
@title = value if name == 'title'
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def to_parquet(schema, *)
|
|
51
|
-
schema.fields.map do |field|
|
|
52
|
-
send(field.name)
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|