feedx 0.10.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +2 -0
  3. data/.travis.yml +13 -6
  4. data/Gemfile.lock +42 -19
  5. data/Makefile +5 -0
  6. data/consumer_test.go +5 -5
  7. data/feedx.gemspec +3 -2
  8. data/feedx_test.go +12 -9
  9. data/format.go +16 -16
  10. data/format_test.go +6 -7
  11. data/go.mod +5 -10
  12. data/go.sum +43 -24
  13. data/internal/testdata/testdata.pb.go +124 -0
  14. data/internal/testdata/testdata.proto +15 -0
  15. data/lib/feedx/cache/abstract.rb +2 -2
  16. data/lib/feedx/compression.rb +11 -4
  17. data/lib/feedx/compression/abstract.rb +2 -2
  18. data/lib/feedx/compression/gzip.rb +14 -16
  19. data/lib/feedx/compression/none.rb +4 -4
  20. data/lib/feedx/consumer.rb +17 -11
  21. data/lib/feedx/format.rb +18 -9
  22. data/lib/feedx/format/abstract.rb +42 -13
  23. data/lib/feedx/format/json.rb +12 -8
  24. data/lib/feedx/format/parquet.rb +102 -0
  25. data/lib/feedx/format/protobuf.rb +16 -8
  26. data/lib/feedx/producer.rb +20 -14
  27. data/lib/feedx/stream.rb +42 -25
  28. data/producer_test.go +1 -2
  29. data/reader_test.go +7 -8
  30. data/spec/feedx/compression/gzip_spec.rb +2 -2
  31. data/spec/feedx/compression/none_spec.rb +2 -2
  32. data/spec/feedx/compression_spec.rb +9 -9
  33. data/spec/feedx/consumer_spec.rb +6 -3
  34. data/spec/feedx/format/abstract_spec.rb +11 -8
  35. data/spec/feedx/format/json_spec.rb +12 -11
  36. data/spec/feedx/format/parquet_spec.rb +30 -0
  37. data/spec/feedx/format/protobuf_spec.rb +12 -11
  38. data/spec/feedx/format_spec.rb +8 -8
  39. data/spec/feedx/producer_spec.rb +6 -0
  40. data/spec/feedx/stream_spec.rb +28 -3
  41. data/spec/spec_helper.rb +17 -1
  42. data/writer_test.go +1 -1
  43. metadata +22 -3
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end
@@ -1,22 +1,23 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Protobuf do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
12
14
 
13
- StringIO.open(wio.string) do |rio|
14
- fmt = described_class.new(rio)
15
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
16
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
17
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
18
- expect(fmt.decode(Feedx::TestCase::Message)).to be_nil
19
- expect(fmt).to be_eof
15
+ subject.decoder rio do |dec|
16
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
17
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
18
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
19
+ expect(dec.decode(Feedx::TestCase::Message)).to be_nil
20
+ expect(dec).to be_eof
20
21
  end
21
22
  end
22
23
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:json)).to eq(described_class::JSON)
6
- expect(described_class.resolve(:pb)).to eq(described_class::Protobuf)
5
+ expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
+ expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
10
  it 'should detect' do
11
- expect(described_class.detect('path/to/file.json')).to eq(described_class::JSON)
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::JSON)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::JSON)
14
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::Protobuf)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Protobuf)
16
- expect(described_class.detect('path/to/file.pb.z')).to eq(described_class::Protobuf)
11
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
14
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
16
+ expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
17
17
  expect do
18
18
  described_class.detect('path/to/file.txt')
19
19
  end.to raise_error(/unable to detect format/)
@@ -64,4 +64,10 @@ RSpec.describe Feedx::Producer do
64
64
  size = described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151516), enum: enumerable
65
65
  expect(size).to eq(15900)
66
66
  end
67
+
68
+ it 'should accept downstream options' do
69
+ expect do
70
+ described_class.perform 'mock:///dir/file.jsonz', enum: enumerable, x: 1, y: 'v', z: true
71
+ end.not_to raise_error
72
+ end
67
73
  end
@@ -2,10 +2,13 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Stream do
4
4
  let(:bucket) { BFS::Bucket::InMem.new }
5
- before { allow(BFS).to receive(:resolve).and_return(bucket) }
5
+ let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
6
6
 
7
7
  subject { described_class.new('mock:///dir/file.json') }
8
- let(:compressed) { described_class.new('mock:///dir/file.json.gz') }
8
+
9
+ before { allow(BFS).to receive(:resolve).and_return(bucket) }
10
+ after { subject.close }
11
+ after { compressed.close }
9
12
 
10
13
  it 'should reject invalid inputs' do
11
14
  expect do
@@ -13,13 +16,35 @@ RSpec.describe Feedx::Stream do
13
16
  end.to raise_error(/unable to detect format/)
14
17
  end
15
18
 
19
+ it 'should accept custom formats' do
20
+ format = Class.new do
21
+ def encoder(io, &block)
22
+ Feedx::Format::JSON::Encoder.open(io, &block)
23
+ end
24
+
25
+ def decoder(io, &block)
26
+ Feedx::Format::JSON::Decoder.open(io, &block)
27
+ end
28
+ end
29
+
30
+ result = described_class.open('mock:///dir/file.txt', format: format.new) do |stream|
31
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
32
+ 21
33
+ end
34
+ expect(result).to eq(21)
35
+
36
+ expect(bucket.read('dir/file.txt')).to eq(
37
+ %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
38
+ )
39
+ end
40
+
16
41
  it 'should encode' do
17
42
  subject.create do |s|
18
43
  s.encode(Feedx::TestCase::Model.new('X'))
19
44
  s.encode(Feedx::TestCase::Model.new('Y'))
20
45
  end
21
46
 
22
- expect(bucket.open('dir/file.json').read).to eq(
47
+ expect(bucket.read('dir/file.json')).to eq(
23
48
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
24
49
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
25
50
  )
@@ -28,13 +28,29 @@ module Feedx
28
28
  end
29
29
  alias eql? ==
30
30
 
31
+ def updated_at
32
+ Time.at(1515151515).utc
33
+ end
34
+
31
35
  def from_json(data, *)
32
36
  hash = ::JSON.parse(data)
33
37
  @title = hash['title'] if hash.is_a?(Hash)
34
38
  end
35
39
 
36
40
  def to_json(*)
37
- ::JSON.dump(title: @title, updated_at: Time.at(1515151515).utc)
41
+ ::JSON.dump(title: @title, updated_at: updated_at)
42
+ end
43
+
44
+ def from_parquet(rec)
45
+ rec.each_pair do |name, value|
46
+ @title = value if name == 'title'
47
+ end
48
+ end
49
+
50
+ def to_parquet(schema, *)
51
+ schema.fields.map do |field|
52
+ send(field.name)
53
+ end
38
54
  end
39
55
  end
40
56
  end
@@ -56,7 +56,7 @@ var _ = Describe("Writer", func() {
56
56
 
57
57
  info, err := plain.Head(ctx)
58
58
  Expect(err).NotTo(HaveOccurred())
59
- Expect(info.Size).To(BeNumerically("~", 470, 10))
59
+ Expect(info.Size).To(BeNumerically("~", 370, 10))
60
60
  Expect(info.Metadata).To(Equal(bfs.Metadata{"X-Feedx-Last-Modified": "1515151515123"}))
61
61
 
62
62
  info, err = compressed.Head(ctx)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.1
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-22 00:00:00.000000000 Z
11
+ date: 2020-07-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: red-parquet
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,8 @@ files:
136
150
  - format_test.go
137
151
  - go.mod
138
152
  - go.sum
153
+ - internal/testdata/testdata.pb.go
154
+ - internal/testdata/testdata.proto
139
155
  - lib/feedx.rb
140
156
  - lib/feedx/cache.rb
141
157
  - lib/feedx/cache/abstract.rb
@@ -149,6 +165,7 @@ files:
149
165
  - lib/feedx/format.rb
150
166
  - lib/feedx/format/abstract.rb
151
167
  - lib/feedx/format/json.rb
168
+ - lib/feedx/format/parquet.rb
152
169
  - lib/feedx/format/protobuf.rb
153
170
  - lib/feedx/producer.rb
154
171
  - lib/feedx/pusher.rb
@@ -165,6 +182,7 @@ files:
165
182
  - spec/feedx/consumer_spec.rb
166
183
  - spec/feedx/format/abstract_spec.rb
167
184
  - spec/feedx/format/json_spec.rb
185
+ - spec/feedx/format/parquet_spec.rb
168
186
  - spec/feedx/format/protobuf_spec.rb
169
187
  - spec/feedx/format_spec.rb
170
188
  - spec/feedx/producer_spec.rb
@@ -191,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
209
  - !ruby/object:Gem::Version
192
210
  version: '0'
193
211
  requirements: []
194
- rubygems_version: 3.0.3
212
+ rubygems_version: 3.1.4
195
213
  signing_key:
196
214
  specification_version: 4
197
215
  summary: Exchange data between components via feeds
@@ -204,6 +222,7 @@ test_files:
204
222
  - spec/feedx/consumer_spec.rb
205
223
  - spec/feedx/format/abstract_spec.rb
206
224
  - spec/feedx/format/json_spec.rb
225
+ - spec/feedx/format/parquet_spec.rb
207
226
  - spec/feedx/format/protobuf_spec.rb
208
227
  - spec/feedx/format_spec.rb
209
228
  - spec/feedx/producer_spec.rb