feedx 0.12.7 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yml +2 -37
  3. data/.golangci.yml +13 -4
  4. data/.rubocop.yml +8 -14
  5. data/.tool-versions +1 -0
  6. data/Gemfile +8 -0
  7. data/Gemfile.lock +54 -68
  8. data/Makefile +3 -3
  9. data/README.md +3 -1
  10. data/compression.go +29 -0
  11. data/compression_test.go +73 -61
  12. data/consumer.go +96 -152
  13. data/consumer_test.go +124 -59
  14. data/example_test.go +140 -0
  15. data/feedx.gemspec +2 -10
  16. data/feedx.go +16 -31
  17. data/feedx_ext_test.go +13 -3
  18. data/feedx_test.go +24 -26
  19. data/format.go +29 -19
  20. data/format_test.go +84 -56
  21. data/go.mod +11 -7
  22. data/go.sum +16 -138
  23. data/incremental.go +122 -0
  24. data/incremental_test.go +62 -0
  25. data/lib/feedx/cache/abstract.rb +3 -3
  26. data/lib/feedx/cache/value.rb +6 -6
  27. data/lib/feedx/compression/abstract.rb +2 -2
  28. data/lib/feedx/compression/gzip.rb +4 -4
  29. data/lib/feedx/consumer.rb +8 -8
  30. data/lib/feedx/format/abstract.rb +6 -6
  31. data/lib/feedx/format/json.rb +2 -2
  32. data/lib/feedx/format/protobuf.rb +6 -6
  33. data/lib/feedx/format.rb +1 -3
  34. data/lib/feedx/producer.rb +11 -11
  35. data/lib/feedx/stream.rb +2 -2
  36. data/lib/feedx.rb +2 -3
  37. data/manifest.go +65 -0
  38. data/producer.go +34 -137
  39. data/producer_test.go +46 -60
  40. data/reader.go +142 -41
  41. data/reader_test.go +86 -35
  42. data/scheduler.go +176 -0
  43. data/scheduler_test.go +128 -0
  44. data/writer.go +13 -13
  45. data/writer_test.go +61 -44
  46. metadata +12 -137
  47. data/.github/workflows/lint.yml +0 -18
  48. data/ext/parquet/decoder.go +0 -59
  49. data/ext/parquet/decoder_test.go +0 -88
  50. data/ext/parquet/encoder.go +0 -27
  51. data/ext/parquet/encoder_test.go +0 -70
  52. data/ext/parquet/go.mod +0 -12
  53. data/ext/parquet/go.sum +0 -193
  54. data/ext/parquet/parquet.go +0 -78
  55. data/ext/parquet/parquet_test.go +0 -28
  56. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  57. data/lib/feedx/format/parquet.rb +0 -102
  58. data/spec/feedx/cache/memory_spec.rb +0 -23
  59. data/spec/feedx/cache/value_spec.rb +0 -19
  60. data/spec/feedx/compression/gzip_spec.rb +0 -17
  61. data/spec/feedx/compression/none_spec.rb +0 -15
  62. data/spec/feedx/compression_spec.rb +0 -19
  63. data/spec/feedx/consumer_spec.rb +0 -49
  64. data/spec/feedx/format/abstract_spec.rb +0 -21
  65. data/spec/feedx/format/json_spec.rb +0 -27
  66. data/spec/feedx/format/parquet_spec.rb +0 -30
  67. data/spec/feedx/format/protobuf_spec.rb +0 -23
  68. data/spec/feedx/format_spec.rb +0 -21
  69. data/spec/feedx/producer_spec.rb +0 -74
  70. data/spec/feedx/stream_spec.rb +0 -109
  71. data/spec/spec_helper.rb +0 -57
@@ -1,23 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe Feedx::Format::Protobuf do
4
- let(:wio) { StringIO.new }
5
- let(:rio) { StringIO.open(wio.string) }
6
-
7
- it 'encode/decodes' do
8
- subject.encoder wio do |enc|
9
- enc.encode(Feedx::TestCase::Model.new('X'))
10
- enc.encode(Feedx::TestCase::Model.new('Y'))
11
- enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
- end
13
- expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
14
-
15
- subject.decoder rio do |dec|
16
- expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
17
- expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
18
- expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
19
- expect(dec.decode(Feedx::TestCase::Message)).to be_nil
20
- expect(dec).to be_eof
21
- end
22
- end
23
- end
@@ -1,21 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe Feedx::Format do
4
- it 'resolves' do
5
- expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
- expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
- expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
- end
9
-
10
- it 'detects' do
11
- expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
- expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
- expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
14
- expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
15
- expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
16
- expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
17
- expect do
18
- described_class.detect('path/to/file.txt')
19
- end.to raise_error(/unable to detect format/)
20
- end
21
- end
@@ -1,74 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe Feedx::Producer do
4
- let :enumerable do
5
- %w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
6
- end
7
-
8
- let(:bucket) { BFS::Bucket::InMem.new }
9
-
10
- before { allow(BFS).to receive(:resolve).and_return(bucket) }
11
-
12
- it 'rejects invalid inputs' do
13
- expect do
14
- described_class.perform 'mock:///dir/file.txt', enum: enumerable
15
- end.to raise_error(/unable to detect format/)
16
- end
17
-
18
- it 'pushes compressed JSON' do
19
- size = described_class.perform 'mock:///dir/file.jsonz', enum: enumerable
20
- expect(size).to be_within(20).of(166)
21
- expect(bucket.info('dir/file.jsonz').size).to eq(size)
22
- end
23
-
24
- it 'pushes plain JSON' do
25
- size = described_class.perform 'mock:///dir/file.json', enum: enumerable
26
- expect(size).to eq(15900)
27
- expect(bucket.info('dir/file.json').size).to eq(size)
28
- end
29
-
30
- it 'pushes compressed PB' do
31
- size = described_class.perform 'mock:///dir/file.pbz', enum: enumerable
32
- expect(size).to be_within(20).of(41)
33
- expect(bucket.info('dir/file.pbz').size).to eq(size)
34
- end
35
-
36
- it 'pushes plain PB' do
37
- size = described_class.perform 'mock:///dir/file.pb', enum: enumerable
38
- expect(size).to eq(1200)
39
- expect(bucket.info('dir/file.pb').size).to eq(size)
40
- end
41
-
42
- it 'supports factories' do
43
- size = described_class.perform('mock:///dir/file.json') do
44
- enumerable
45
- end
46
- expect(size).to eq(15900)
47
- expect(bucket.info('dir/file.json').size).to eq(size)
48
- end
49
-
50
- it 'supports last-modified' do
51
- described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
52
- expect(bucket.info('dir/file.json').metadata).to eq('X-Feedx-Last-Modified' => '1515151515000')
53
- end
54
-
55
- it 'performs conditionally' do
56
- size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
57
- expect(size).to eq(15900)
58
-
59
- size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
60
- expect(size).to eq(-1)
61
-
62
- size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151514), enum: enumerable
63
- expect(size).to eq(-1)
64
-
65
- size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
66
- expect(size).to eq(15900)
67
- end
68
-
69
- it 'accepts downstream options' do
70
- expect do
71
- described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
72
- end.not_to raise_error
73
- end
74
- end
@@ -1,109 +0,0 @@
1
- require 'spec_helper'
2
-
3
- RSpec.describe Feedx::Stream do
4
- subject { described_class.new('mock:///dir/file.json') }
5
-
6
- let(:bucket) { BFS::Bucket::InMem.new }
7
- let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
8
-
9
- before { allow(BFS).to receive(:resolve).and_return(bucket) }
10
-
11
- after { subject.close; compressed.close }
12
-
13
- it 'rejects invalid inputs' do
14
- expect do
15
- described_class.new('mock:///dir/file.txt')
16
- end.to raise_error(/unable to detect format/)
17
- end
18
-
19
- it 'accepts custom formats' do
20
- format = Class.new do
21
- def encoder(io, &block)
22
- Feedx::Format::JSON::Encoder.open(io, &block)
23
- end
24
-
25
- def decoder(io, &block)
26
- Feedx::Format::JSON::Decoder.open(io, &block)
27
- end
28
- end
29
-
30
- result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
31
- stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
32
- 21
33
- end
34
- expect(result).to eq(21)
35
-
36
- expect(bucket.read('dir/file.txt')).to eq(<<~JSON)
37
- {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
38
- JSON
39
- end
40
-
41
- it 'encodes' do
42
- subject.create do |s|
43
- s.encode(Feedx::TestCase::Model.new('X'))
44
- s.encode(Feedx::TestCase::Model.new('Y'))
45
- end
46
-
47
- expect(bucket.read('dir/file.json')).to eq(<<~JSON)
48
- {"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
49
- {"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
50
- JSON
51
- end
52
-
53
- it 'encodes compressed' do
54
- compressed.create do |s|
55
- 100.times do
56
- s.encode(Feedx::TestCase::Model.new('X'))
57
- end
58
- end
59
-
60
- expect(bucket.info('dir/file.json.gz').size).to be_within(10).of(108)
61
- end
62
-
63
- it 'encodes with create options' do
64
- subject.create metadata: { 'x' => '5' } do |s|
65
- s.encode(Feedx::TestCase::Model.new('X'))
66
- end
67
- expect(bucket.info('dir/file.json').metadata).to eq('X' => '5')
68
- end
69
-
70
- it 'aborts encode on errors (if compressed)' do
71
- stop = RuntimeError.new('STOP')
72
- expect do
73
- compressed.create do |s|
74
- s.encode(Feedx::TestCase::Model.new('X'))
75
- raise stop
76
- end
77
- end.to raise_error(stop)
78
-
79
- expect(bucket.ls('**').to_a).to be_empty
80
- end
81
-
82
- it 'decodes' do
83
- subject.create do |s|
84
- s.encode(Feedx::TestCase::Model.new('X'))
85
- s.encode(Feedx::TestCase::Model.new('Y'))
86
- end
87
-
88
- subject.open do |s|
89
- expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
90
- expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
91
- expect(s.decode(Feedx::TestCase::Model)).to be_nil
92
- expect(s).to be_eof
93
- end
94
- end
95
-
96
- it 'decodes compressed' do
97
- compressed.create do |s|
98
- s.encode(Feedx::TestCase::Model.new('X'))
99
- s.encode(Feedx::TestCase::Model.new('Y'))
100
- end
101
-
102
- compressed.open do |s|
103
- expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
104
- expect(s.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
105
- expect(s.decode(Feedx::TestCase::Model)).to be_nil
106
- expect(s).to be_eof
107
- end
108
- end
109
- end
data/spec/spec_helper.rb DELETED
@@ -1,57 +0,0 @@
1
- require 'rspec'
2
- require 'feedx'
3
- require 'google/protobuf'
4
-
5
- Google::Protobuf::DescriptorPool.generated_pool.build do
6
- add_message 'com.blacksquaremedia.feedx.testcase.Message' do
7
- optional :title, :string, 1
8
- end
9
- end
10
-
11
- module Feedx
12
- module TestCase
13
- Message = Google::Protobuf::DescriptorPool.generated_pool.lookup('com.blacksquaremedia.feedx.testcase.Message').msgclass
14
-
15
- class Model
16
- attr_reader :title
17
-
18
- def initialize(title)
19
- @title = title
20
- end
21
-
22
- def to_pb(*)
23
- Feedx::TestCase::Message.new title: @title
24
- end
25
-
26
- def ==(other)
27
- title == other.title
28
- end
29
- alias eql? ==
30
-
31
- def updated_at
32
- Time.at(1515151515).utc
33
- end
34
-
35
- def from_json(data, *)
36
- hash = ::JSON.parse(data)
37
- @title = hash['title'] if hash.is_a?(Hash)
38
- end
39
-
40
- def to_json(*)
41
- ::JSON.dump(title: @title, updated_at: updated_at)
42
- end
43
-
44
- def from_parquet(rec)
45
- rec.each_pair do |name, value|
46
- @title = value if name == 'title'
47
- end
48
- end
49
-
50
- def to_parquet(schema, *)
51
- schema.fields.map do |field|
52
- send(field.name)
53
- end
54
- end
55
- end
56
- end
57
- end