feedx 0.10.0 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +2 -0
  4. data/.travis.yml +13 -6
  5. data/Gemfile.lock +43 -20
  6. data/Makefile +5 -0
  7. data/consumer_test.go +5 -5
  8. data/feedx.gemspec +3 -2
  9. data/feedx_test.go +12 -9
  10. data/format.go +16 -16
  11. data/format_test.go +6 -7
  12. data/go.mod +5 -10
  13. data/go.sum +43 -24
  14. data/internal/testdata/testdata.pb.go +124 -0
  15. data/internal/testdata/testdata.proto +15 -0
  16. data/lib/feedx/cache/abstract.rb +2 -2
  17. data/lib/feedx/compression.rb +11 -4
  18. data/lib/feedx/compression/abstract.rb +2 -2
  19. data/lib/feedx/compression/gzip.rb +14 -2
  20. data/lib/feedx/compression/none.rb +4 -4
  21. data/lib/feedx/consumer.rb +17 -11
  22. data/lib/feedx/format.rb +18 -9
  23. data/lib/feedx/format/abstract.rb +42 -13
  24. data/lib/feedx/format/json.rb +12 -8
  25. data/lib/feedx/format/parquet.rb +102 -0
  26. data/lib/feedx/format/protobuf.rb +16 -8
  27. data/lib/feedx/producer.rb +20 -14
  28. data/lib/feedx/stream.rb +41 -25
  29. data/producer_test.go +1 -2
  30. data/reader_test.go +7 -8
  31. data/spec/feedx/compression/gzip_spec.rb +4 -2
  32. data/spec/feedx/compression/none_spec.rb +2 -2
  33. data/spec/feedx/compression_spec.rb +9 -9
  34. data/spec/feedx/consumer_spec.rb +6 -3
  35. data/spec/feedx/format/abstract_spec.rb +11 -8
  36. data/spec/feedx/format/json_spec.rb +12 -11
  37. data/spec/feedx/format/parquet_spec.rb +30 -0
  38. data/spec/feedx/format/protobuf_spec.rb +12 -11
  39. data/spec/feedx/format_spec.rb +8 -8
  40. data/spec/feedx/producer_spec.rb +6 -0
  41. data/spec/feedx/stream_spec.rb +26 -3
  42. data/spec/spec_helper.rb +17 -1
  43. data/writer_test.go +1 -1
  44. metadata +22 -3
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end
@@ -1,22 +1,23 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Protobuf do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
12
14
 
13
- StringIO.open(wio.string) do |rio|
14
- fmt = described_class.new(rio)
15
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
16
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
17
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
18
- expect(fmt.decode(Feedx::TestCase::Message)).to be_nil
19
- expect(fmt).to be_eof
15
+ subject.decoder rio do |dec|
16
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
17
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
18
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
19
+ expect(dec.decode(Feedx::TestCase::Message)).to be_nil
20
+ expect(dec).to be_eof
20
21
  end
21
22
  end
22
23
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:json)).to eq(described_class::JSON)
6
- expect(described_class.resolve(:pb)).to eq(described_class::Protobuf)
5
+ expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
+ expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
10
  it 'should detect' do
11
- expect(described_class.detect('path/to/file.json')).to eq(described_class::JSON)
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::JSON)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::JSON)
14
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::Protobuf)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Protobuf)
16
- expect(described_class.detect('path/to/file.pb.z')).to eq(described_class::Protobuf)
11
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
14
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
16
+ expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
17
17
  expect do
18
18
  described_class.detect('path/to/file.txt')
19
19
  end.to raise_error(/unable to detect format/)
@@ -64,4 +64,10 @@ RSpec.describe Feedx::Producer do
64
64
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
65
65
  expect(size).to eq(15900)
66
66
  end
67
+
68
+ it 'should accept downstream options' do
69
+ expect do
70
+ described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
71
+ end.not_to raise_error
72
+ end
67
73
  end
@@ -2,10 +2,13 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Stream do
4
4
  let(:bucket) { BFS::Bucket::InMem.new }
5
- before { allow(BFS).to receive(:resolve).and_return(bucket) }
5
+ let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
6
6
 
7
7
  subject { described_class.new('mock:///dir/file.json') }
8
- let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
8
+
9
+ before { allow(BFS).to receive(:resolve).and_return(bucket) }
10
+ after { subject.close }
11
+ after { compressed.close }
9
12
 
10
13
  it 'should reject invalid inputs' do
11
14
  expect do
@@ -13,13 +16,33 @@ RSpec.describe Feedx::Stream do
13
16
  end.to raise_error(/unable to detect format/)
14
17
  end
15
18
 
19
+ it 'should accept custom formats' do
20
+ format = Class.new do
21
+ def encoder(io, &block)
22
+ Feedx::Format::JSON::Encoder.open(io, &block)
23
+ end
24
+
25
+ def decoder(io, &block)
26
+ Feedx::Format::JSON::Decoder.open(io, &block)
27
+ end
28
+ end
29
+
30
+ described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
31
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
32
+ end
33
+
34
+ expect(bucket.read('dir/file.txt')).to eq(
35
+ %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
36
+ )
37
+ end
38
+
16
39
  it 'should encode' do
17
40
  subject.create do |s|
18
41
  s.encode(Feedx::TestCase::Model.new('X'))
19
42
  s.encode(Feedx::TestCase::Model.new('Y'))
20
43
  end
21
44
 
22
- expect(bucket.open('dir/file.json').read).to eq(
45
+ expect(bucket.read('dir/file.json')).to eq(
23
46
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
24
47
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
25
48
  )
@@ -28,13 +28,29 @@ module Feedx
28
28
  end
29
29
  alias eql? ==
30
30
 
31
+ def updated_at
32
+ Time.at(1515151515).utc
33
+ end
34
+
31
35
  def from_json(data, *)
32
36
  hash = ::JSON.parse(data)
33
37
  @title = hash['title'] if hash.is_a?(Hash)
34
38
  end
35
39
 
36
40
  def to_json(*)
37
- ::JSON.dump(title: @title, updated_at: Time.at(1515151515).utc)
41
+ ::JSON.dump(title: @title, updated_at: updated_at)
42
+ end
43
+
44
+ def from_parquet(rec)
45
+ rec.each_pair do |name, value|
46
+ @title = value if name == 'title'
47
+ end
48
+ end
49
+
50
+ def to_parquet(schema, *)
51
+ schema.fields.map do |field|
52
+ send(field.name)
53
+ end
38
54
  end
39
55
  end
40
56
  end
@@ -56,7 +56,7 @@ var _ = Describe("Writer", func() {
56
56
 
57
57
  info, err := plain.Head(ctx)
58
58
  Expect(err).NotTo(HaveOccurred())
59
- Expect(info.Size).To(BeNumerically("~", 470, 10))
59
+ Expect(info.Size).To(BeNumerically("~", 370, 10))
60
60
  Expect(info.Metadata).To(Equal(bfs.Metadata{"X-Feedx-Last-Modified": "1515151515123"}))
61
61
 
62
62
  info, err = compressed.Head(ctx)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-20 00:00:00.000000000 Z
11
+ date: 2020-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: red-parquet
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,8 @@ files:
136
150
  - format_test.go
137
151
  - go.mod
138
152
  - go.sum
153
+ - internal/testdata/testdata.pb.go
154
+ - internal/testdata/testdata.proto
139
155
  - lib/feedx.rb
140
156
  - lib/feedx/cache.rb
141
157
  - lib/feedx/cache/abstract.rb
@@ -149,6 +165,7 @@ files:
149
165
  - lib/feedx/format.rb
150
166
  - lib/feedx/format/abstract.rb
151
167
  - lib/feedx/format/json.rb
168
+ - lib/feedx/format/parquet.rb
152
169
  - lib/feedx/format/protobuf.rb
153
170
  - lib/feedx/producer.rb
154
171
  - lib/feedx/pusher.rb
@@ -165,6 +182,7 @@ files:
165
182
  - spec/feedx/consumer_spec.rb
166
183
  - spec/feedx/format/abstract_spec.rb
167
184
  - spec/feedx/format/json_spec.rb
185
+ - spec/feedx/format/parquet_spec.rb
168
186
  - spec/feedx/format/protobuf_spec.rb
169
187
  - spec/feedx/format_spec.rb
170
188
  - spec/feedx/producer_spec.rb
@@ -191,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
209
  - !ruby/object:Gem::Version
192
210
  version: '0'
193
211
  requirements: []
194
- rubygems_version: 3.0.6
212
+ rubygems_version: 3.1.4
195
213
  signing_key:
196
214
  specification_version: 4
197
215
  summary: Exchange data between components via feeds
@@ -204,6 +222,7 @@ test_files:
204
222
  - spec/feedx/consumer_spec.rb
205
223
  - spec/feedx/format/abstract_spec.rb
206
224
  - spec/feedx/format/json_spec.rb
225
+ - spec/feedx/format/parquet_spec.rb
207
226
  - spec/feedx/format/protobuf_spec.rb
208
227
  - spec/feedx/format_spec.rb
209
228
  - spec/feedx/producer_spec.rb