feedx 0.12.6 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -49
- data/.golangci.yml +13 -0
- data/.rubocop.yml +8 -14
- data/.tool-versions +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +54 -69
- data/Makefile +3 -3
- data/README.md +3 -1
- data/compression.go +29 -0
- data/compression_test.go +73 -61
- data/consumer.go +96 -152
- data/consumer_test.go +124 -59
- data/example_test.go +140 -0
- data/feedx.gemspec +3 -10
- data/feedx.go +16 -31
- data/feedx_ext_test.go +13 -3
- data/feedx_test.go +24 -24
- data/format.go +29 -19
- data/format_test.go +84 -56
- data/go.mod +12 -10
- data/go.sum +18 -142
- data/incremental.go +122 -0
- data/incremental_test.go +62 -0
- data/lib/feedx/cache/abstract.rb +3 -3
- data/lib/feedx/cache/value.rb +6 -6
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +4 -4
- data/lib/feedx/consumer.rb +8 -8
- data/lib/feedx/format/abstract.rb +6 -6
- data/lib/feedx/format/json.rb +2 -2
- data/lib/feedx/format/protobuf.rb +6 -6
- data/lib/feedx/format.rb +1 -3
- data/lib/feedx/producer.rb +11 -11
- data/lib/feedx/stream.rb +2 -2
- data/lib/feedx.rb +2 -3
- data/manifest.go +65 -0
- data/producer.go +34 -137
- data/producer_test.go +46 -60
- data/reader.go +142 -41
- data/reader_test.go +86 -35
- data/scheduler.go +176 -0
- data/scheduler_test.go +128 -0
- data/writer.go +13 -13
- data/writer_test.go +61 -44
- metadata +15 -137
- data/ext/parquet/decoder.go +0 -170
- data/ext/parquet/decoder_test.go +0 -88
- data/ext/parquet/go.mod +0 -10
- data/ext/parquet/go.sum +0 -154
- data/ext/parquet/parquet.go +0 -78
- data/ext/parquet/parquet_test.go +0 -28
- data/ext/parquet/reader.go +0 -89
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +0 -51
- data/lib/feedx/format/parquet.rb +0 -102
- data/spec/feedx/cache/memory_spec.rb +0 -23
- data/spec/feedx/cache/value_spec.rb +0 -19
- data/spec/feedx/compression/gzip_spec.rb +0 -17
- data/spec/feedx/compression/none_spec.rb +0 -15
- data/spec/feedx/compression_spec.rb +0 -19
- data/spec/feedx/consumer_spec.rb +0 -49
- data/spec/feedx/format/abstract_spec.rb +0 -21
- data/spec/feedx/format/json_spec.rb +0 -27
- data/spec/feedx/format/parquet_spec.rb +0 -30
- data/spec/feedx/format/protobuf_spec.rb +0 -23
- data/spec/feedx/format_spec.rb +0 -21
- data/spec/feedx/producer_spec.rb +0 -74
- data/spec/feedx/stream_spec.rb +0 -109
- data/spec/spec_helper.rb +0 -57
data/spec/feedx/producer_spec.rb
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Producer do
|
|
4
|
-
let :enumerable do
|
|
5
|
-
%w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
let(:bucket) { BFS::Bucket::InMem.new }
|
|
9
|
-
|
|
10
|
-
before { allow(BFS).to receive(:resolve).and_return(bucket) }
|
|
11
|
-
|
|
12
|
-
it 'rejects invalid inputs' do
|
|
13
|
-
expect do
|
|
14
|
-
described_class.perform 'mock:///dir/file.txt', enum: enumerable
|
|
15
|
-
end.to raise_error(/unable to detect format/)
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
it 'pushes compressed JSON' do
|
|
19
|
-
size = described_class.perform 'mock:///dir/file.jsonz', enum: enumerable
|
|
20
|
-
expect(size).to be_within(20).of(166)
|
|
21
|
-
expect(bucket.info('dir/file.jsonz').size).to eq(size)
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
it 'pushes plain JSON' do
|
|
25
|
-
size = described_class.perform 'mock:///dir/file.json', enum: enumerable
|
|
26
|
-
expect(size).to eq(15900)
|
|
27
|
-
expect(bucket.info('dir/file.json').size).to eq(size)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
it 'pushes compressed PB' do
|
|
31
|
-
size = described_class.perform 'mock:///dir/file.pbz', enum: enumerable
|
|
32
|
-
expect(size).to be_within(20).of(41)
|
|
33
|
-
expect(bucket.info('dir/file.pbz').size).to eq(size)
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
it 'pushes plain PB' do
|
|
37
|
-
size = described_class.perform 'mock:///dir/file.pb', enum: enumerable
|
|
38
|
-
expect(size).to eq(1200)
|
|
39
|
-
expect(bucket.info('dir/file.pb').size).to eq(size)
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
it 'supports factories' do
|
|
43
|
-
size = described_class.perform('mock:///dir/file.json') do
|
|
44
|
-
enumerable
|
|
45
|
-
end
|
|
46
|
-
expect(size).to eq(15900)
|
|
47
|
-
expect(bucket.info('dir/file.json').size).to eq(size)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
it 'supports last-modified' do
|
|
51
|
-
described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
52
|
-
expect(bucket.info('dir/file.json').metadata).to eq('X-Feedx-Last-Modified' => '1515151515000')
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
it 'performs conditionally' do
|
|
56
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
57
|
-
expect(size).to eq(15900)
|
|
58
|
-
|
|
59
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
|
|
60
|
-
expect(size).to eq(-1)
|
|
61
|
-
|
|
62
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151514), enum: enumerable
|
|
63
|
-
expect(size).to eq(-1)
|
|
64
|
-
|
|
65
|
-
size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
|
|
66
|
-
expect(size).to eq(15900)
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
it 'accepts downstream options' do
|
|
70
|
-
expect do
|
|
71
|
-
described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
|
|
72
|
-
end.not_to raise_error
|
|
73
|
-
end
|
|
74
|
-
end
|
data/spec/feedx/stream_spec.rb
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Stream do
|
|
4
|
-
subject { described_class.new('mock:///dir/file.json') }
|
|
5
|
-
|
|
6
|
-
let(:bucket) { BFS::Bucket::InMem.new }
|
|
7
|
-
let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
|
|
8
|
-
|
|
9
|
-
before { allow(BFS).to receive(:resolve).and_return(bucket) }
|
|
10
|
-
|
|
11
|
-
after { subject.close; compressed.close }
|
|
12
|
-
|
|
13
|
-
it 'rejects invalid inputs' do
|
|
14
|
-
expect do
|
|
15
|
-
described_class.new('mock:///dir/file.txt')
|
|
16
|
-
end.to raise_error(/unable to detect format/)
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it 'accepts custom formats' do
|
|
20
|
-
format = Class.new do
|
|
21
|
-
def encoder(io, &block)
|
|
22
|
-
Feedx::Format::JSON::Encoder.open(io, &block)
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def decoder(io, &block)
|
|
26
|
-
Feedx::Format::JSON::Decoder.open(io, &block)
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
|
|
31
|
-
stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
|
|
32
|
-
21
|
|
33
|
-
end
|
|
34
|
-
expect(result).to eq(21)
|
|
35
|
-
|
|
36
|
-
expect(bucket.read('dir/file.txt')).to eq(<<~JSON)
|
|
37
|
-
{"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
38
|
-
JSON
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
it 'encodes' do
|
|
42
|
-
subject.create do |s|
|
|
43
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
44
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
expect(bucket.read('dir/file.json')).to eq(<<~JSON)
|
|
48
|
-
{"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
49
|
-
{"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
50
|
-
JSON
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it 'encodes compressed' do
|
|
54
|
-
compressed.create do |s|
|
|
55
|
-
100.times do
|
|
56
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
expect(bucket.info('dir/file.json.gz').size).to be_within(10).of(108)
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
it 'encodes with create options' do
|
|
64
|
-
subject.create metadata: { 'x' => '5' } do |s|
|
|
65
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
66
|
-
end
|
|
67
|
-
expect(bucket.info('dir/file.json').metadata).to eq('X' => '5')
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
it 'aborts encode on errors (if compressed)' do
|
|
71
|
-
stop = RuntimeError.new('STOP')
|
|
72
|
-
expect do
|
|
73
|
-
compressed.create do |s|
|
|
74
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
75
|
-
raise stop
|
|
76
|
-
end
|
|
77
|
-
end.to raise_error(stop)
|
|
78
|
-
|
|
79
|
-
expect(bucket.ls('**').to_a).to be_empty
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
it 'decodes' do
|
|
83
|
-
subject.create do |s|
|
|
84
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
85
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
subject.open do |s|
|
|
89
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
90
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
|
|
91
|
-
expect(s.decode(Feedx::TestCase::Model)).to be_nil
|
|
92
|
-
expect(s).to be_eof
|
|
93
|
-
end
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
it 'decodes compressed' do
|
|
97
|
-
compressed.create do |s|
|
|
98
|
-
s.encode(Feedx::TestCase::Model.new('X'))
|
|
99
|
-
s.encode(Feedx::TestCase::Model.new('Y'))
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
compressed.open do |s|
|
|
103
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
104
|
-
expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
|
|
105
|
-
expect(s.decode(Feedx::TestCase::Model)).to be_nil
|
|
106
|
-
expect(s).to be_eof
|
|
107
|
-
end
|
|
108
|
-
end
|
|
109
|
-
end
|
data/spec/spec_helper.rb
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
require 'rspec'
|
|
2
|
-
require 'feedx'
|
|
3
|
-
require 'google/protobuf'
|
|
4
|
-
|
|
5
|
-
Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
6
|
-
add_message 'com.blacksquaremedia.feedx.testcase.Message' do
|
|
7
|
-
optional :title, :string, 1
|
|
8
|
-
end
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
module Feedx
|
|
12
|
-
module TestCase
|
|
13
|
-
Message = Google::Protobuf::DescriptorPool.generated_pool.lookup('com.blacksquaremedia.feedx.testcase.Message').msgclass
|
|
14
|
-
|
|
15
|
-
class Model
|
|
16
|
-
attr_reader :title
|
|
17
|
-
|
|
18
|
-
def initialize(title)
|
|
19
|
-
@title = title
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def to_pb(*)
|
|
23
|
-
Feedx::TestCase::Message.new title: @title
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def ==(other)
|
|
27
|
-
title == other.title
|
|
28
|
-
end
|
|
29
|
-
alias eql? ==
|
|
30
|
-
|
|
31
|
-
def updated_at
|
|
32
|
-
Time.at(1515151515).utc
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def from_json(data, *)
|
|
36
|
-
hash = ::JSON.parse(data)
|
|
37
|
-
@title = hash['title'] if hash.is_a?(Hash)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def to_json(*)
|
|
41
|
-
::JSON.dump(title: @title, updated_at: updated_at)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def from_parquet(rec)
|
|
45
|
-
rec.each_pair do |name, value|
|
|
46
|
-
@title = value if name == 'title'
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
def to_parquet(schema, *)
|
|
51
|
-
schema.fields.map do |field|
|
|
52
|
-
send(field.name)
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
end
|