feedx 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,24 @@
1
1
  require 'pbio'
2
2
 
3
3
  class Feedx::Format::Protobuf < Feedx::Format::Abstract
4
- def initialize(io)
5
- super PBIO::Delimited.new(io)
6
- end
4
+ class Decoder < Feedx::Format::Abstract::Decoder
5
+ def initialize(io, **opts)
6
+ super PBIO::Delimited.new(io), **opts
7
+ end
7
8
 
8
- def decode(klass, **)
9
- @io.read(klass)
9
+ def decode(target, **)
10
+ @io.read(target)
11
+ end
10
12
  end
11
13
 
12
- def encode(msg, **opts)
13
- msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
14
- @io.write msg
14
+ class Encoder < Feedx::Format::Abstract::Encoder
15
+ def initialize(io, **opts)
16
+ super PBIO::Delimited.new(io), **opts
17
+ end
18
+
19
+ def encode(msg, **opts)
20
+ msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
21
+ @io.write msg
22
+ end
15
23
  end
16
24
  end
@@ -23,8 +23,9 @@ module Feedx
23
23
  def open(**opts)
24
24
  @blob.open(**opts) do |io|
25
25
  @compress.reader(io) do |cio|
26
- fmt = @format.new(cio)
27
- yield fmt
26
+ @format.decoder(cio) do |fmt|
27
+ yield fmt
28
+ end
28
29
  end
29
30
  end
30
31
  end
@@ -36,8 +37,9 @@ module Feedx
36
37
  def create(**opts)
37
38
  @blob.create(**opts) do |io|
38
39
  @compress.writer(io) do |cio|
39
- fmt = @format.new(cio)
40
- yield fmt
40
+ @format.encoder(cio) do |fmt|
41
+ yield fmt
42
+ end
41
43
  end
42
44
  end
43
45
  end
@@ -48,13 +50,10 @@ module Feedx
48
50
  case val
49
51
  when nil
50
52
  Feedx::Format.detect(@blob.path)
51
- when Class
52
- parent = Feedx::Format::Abstract
53
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
54
-
55
- val
56
- else
53
+ when String, Symbol
57
54
  Feedx::Format.resolve(val)
55
+ else
56
+ Feedx::Format.validate!(val)
58
57
  end
59
58
  end
60
59
 
@@ -62,13 +61,10 @@ module Feedx
62
61
  case val
63
62
  when nil
64
63
  Feedx::Compression.detect(@blob.path)
65
- when Class
66
- parent = Feedx::Compression::Abstract
67
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
68
-
69
- val
70
- else
64
+ when String, Symbol
71
65
  Feedx::Compression.resolve(val)
66
+ else
67
+ Feedx::Compression.validate!(val)
72
68
  end
73
69
  end
74
70
  end
@@ -23,8 +23,7 @@ var _ = Describe("Producer", func() {
23
23
  atomic.AddUint32(&numRuns, 1)
24
24
 
25
25
  for i := 0; i < 10; i++ {
26
- fix := fixture
27
- if err := w.Encode(&fix); err != nil {
26
+ if err := w.Encode(seed()); err != nil {
28
27
  return err
29
28
  }
30
29
  }
@@ -5,9 +5,9 @@ import (
5
5
  "io"
6
6
  "io/ioutil"
7
7
 
8
- "github.com/bsm/feedx"
9
-
10
8
  "github.com/bsm/bfs"
9
+ "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
11
11
  . "github.com/onsi/ginkgo"
12
12
  . "github.com/onsi/gomega"
13
13
  )
@@ -38,18 +38,18 @@ var _ = Describe("Reader", func() {
38
38
  })
39
39
 
40
40
  It("should decode", func() {
41
- var msgs []MockMessage
41
+ var msgs []*testdata.MockMessage
42
42
  for {
43
- var msg MockMessage
43
+ var msg testdata.MockMessage
44
44
  err := subject.Decode(&msg)
45
45
  if err == io.EOF {
46
46
  break
47
47
  }
48
48
  Expect(err).NotTo(HaveOccurred())
49
- msgs = append(msgs, msg)
49
+ msgs = append(msgs, &msg)
50
50
  }
51
51
 
52
- Expect(msgs).To(Equal([]MockMessage{fixture, fixture, fixture}))
52
+ Expect(msgs).To(ConsistOf(seed(), seed(), seed()))
53
53
  Expect(subject.NumRead()).To(Equal(3))
54
54
  })
55
55
  })
@@ -3,13 +3,13 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::Gzip do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to be_within(20).of(40)
8
8
  expect(wio.string.encoding).to eq(Encoding::BINARY)
9
9
 
10
10
  data = ''
11
11
  StringIO.open(wio.string) do |rio|
12
- described_class.reader(rio) {|z| data = z.read }
12
+ subject.reader(rio) {|z| data = z.read }
13
13
  end
14
14
  expect(data.size).to eq(3000)
15
15
  expect(data.encoding).to eq(Encoding.default_external)
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::None do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to eq(3000)
8
8
 
9
9
  data = ''
10
10
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
11
+ subject.reader(rio) {|z| data = z.read }
12
12
  end
13
13
  expect(data.size).to eq(3000)
14
14
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Compression do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:gzip)).to eq(described_class::Gzip)
6
- expect(described_class.resolve(:gz)).to eq(described_class::Gzip)
7
- expect(described_class.resolve(nil)).to eq(described_class::None)
5
+ expect(described_class.resolve(:gzip)).to be_instance_of(described_class::Gzip)
6
+ expect(described_class.resolve(:gz)).to be_instance_of(described_class::Gzip)
7
+ expect(described_class.resolve(nil)).to be_instance_of(described_class::None)
8
8
  expect { described_class.resolve(:txt) }.to raise_error(/invalid compression txt/)
9
9
  end
10
10
 
11
11
  it 'should detect' do
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::Gzip)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::Gzip)
14
- expect(described_class.detect('path/to/file.json')).to eq(described_class::None)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Gzip)
16
- expect(described_class.detect('path/to/file.pb.gz')).to eq(described_class::Gzip)
17
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::None)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::Gzip)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::Gzip)
14
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::None)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Gzip)
16
+ expect(described_class.detect('path/to/file.pb.gz')).to be_instance_of(described_class::Gzip)
17
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::None)
18
18
  end
19
19
  end
@@ -1,17 +1,20 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Abstract do
4
- subject { Feedx::Format::JSON.new(wio) }
4
+ subject { Feedx::Format::JSON.new }
5
5
  let(:wio) { StringIO.new }
6
+ let(:rio) { StringIO.open(wio.string) }
6
7
 
7
8
  it 'should decode each' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
11
- StringIO.open(wio.string) do |rio|
12
- fmt = subject.class.new(rio)
13
- dec = fmt.decode_each(Feedx::TestCase::Model).to_a
14
- expect(dec.map(&:title)).to eq(%w[X Y Z])
9
+ subject.encoder wio do |enc|
10
+ enc.encode(Feedx::TestCase::Model.new('X'))
11
+ enc.encode(Feedx::TestCase::Model.new('Y'))
12
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
13
+ end
14
+
15
+ subject.decoder rio do |dec|
16
+ acc = dec.decode_each(Feedx::TestCase::Model).to_a
17
+ expect(acc.map(&:title)).to eq(%w[X Y Z])
15
18
  end
16
19
  end
17
20
  end
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end
@@ -1,22 +1,23 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Protobuf do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
12
14
 
13
- StringIO.open(wio.string) do |rio|
14
- fmt = described_class.new(rio)
15
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
16
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
17
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
18
- expect(fmt.decode(Feedx::TestCase::Message)).to be_nil
19
- expect(fmt).to be_eof
15
+ subject.decoder rio do |dec|
16
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
17
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
18
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
19
+ expect(dec.decode(Feedx::TestCase::Message)).to be_nil
20
+ expect(dec).to be_eof
20
21
  end
21
22
  end
22
23
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:json)).to eq(described_class::JSON)
6
- expect(described_class.resolve(:pb)).to eq(described_class::Protobuf)
5
+ expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
+ expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
10
  it 'should detect' do
11
- expect(described_class.detect('path/to/file.json')).to eq(described_class::JSON)
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::JSON)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::JSON)
14
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::Protobuf)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Protobuf)
16
- expect(described_class.detect('path/to/file.pb.z')).to eq(described_class::Protobuf)
11
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
14
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
16
+ expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
17
17
  expect do
18
18
  described_class.detect('path/to/file.txt')
19
19
  end.to raise_error(/unable to detect format/)
@@ -13,13 +13,32 @@ RSpec.describe Feedx::Stream do
13
13
  end.to raise_error(/unable to detect format/)
14
14
  end
15
15
 
16
+ it 'should accept custom formats' do
17
+ format = Class.new do
18
+ def encoder(io, &block)
19
+ Feedx::Format::JSON::Encoder.open(io, &block)
20
+ end
21
+
22
+ def decoder(io, &block)
23
+ Feedx::Format::JSON::Decoder.open(io, &block)
24
+ end
25
+ end
26
+
27
+ stream = described_class.new('mock:///dir/file.txt', format: format.new)
28
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
29
+
30
+ expect(bucket.read('dir/file.txt')).to eq(
31
+ %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
32
+ )
33
+ end
34
+
16
35
  it 'should encode' do
17
36
  subject.create do |s|
18
37
  s.encode(Feedx::TestCase::Model.new('X'))
19
38
  s.encode(Feedx::TestCase::Model.new('Y'))
20
39
  end
21
40
 
22
- expect(bucket.open('dir/file.json').read).to eq(
41
+ expect(bucket.read('dir/file.json')).to eq(
23
42
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
24
43
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
25
44
  )
@@ -28,13 +28,29 @@ module Feedx
28
28
  end
29
29
  alias eql? ==
30
30
 
31
+ def updated_at
32
+ Time.at(1515151515).utc
33
+ end
34
+
31
35
  def from_json(data, *)
32
36
  hash = ::JSON.parse(data)
33
37
  @title = hash['title'] if hash.is_a?(Hash)
34
38
  end
35
39
 
36
40
  def to_json(*)
37
- ::JSON.dump(title: @title, updated_at: Time.at(1515151515).utc)
41
+ ::JSON.dump(title: @title, updated_at: updated_at)
42
+ end
43
+
44
+ def from_parquet(rec)
45
+ rec.each_pair do |name, value|
46
+ @title = value if name == 'title'
47
+ end
48
+ end
49
+
50
+ def to_parquet(schema, *)
51
+ schema.fields.map do |field|
52
+ send(field.name)
53
+ end
38
54
  end
39
55
  end
40
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.2
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-28 00:00:00.000000000 Z
11
+ date: 2020-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: red-parquet
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,8 @@ files:
136
150
  - format_test.go
137
151
  - go.mod
138
152
  - go.sum
153
+ - internal/testdata/testdata.pb.go
154
+ - internal/testdata/testdata.proto
139
155
  - lib/feedx.rb
140
156
  - lib/feedx/cache.rb
141
157
  - lib/feedx/cache/abstract.rb
@@ -149,6 +165,7 @@ files:
149
165
  - lib/feedx/format.rb
150
166
  - lib/feedx/format/abstract.rb
151
167
  - lib/feedx/format/json.rb
168
+ - lib/feedx/format/parquet.rb
152
169
  - lib/feedx/format/protobuf.rb
153
170
  - lib/feedx/producer.rb
154
171
  - lib/feedx/pusher.rb
@@ -165,6 +182,7 @@ files:
165
182
  - spec/feedx/consumer_spec.rb
166
183
  - spec/feedx/format/abstract_spec.rb
167
184
  - spec/feedx/format/json_spec.rb
185
+ - spec/feedx/format/parquet_spec.rb
168
186
  - spec/feedx/format/protobuf_spec.rb
169
187
  - spec/feedx/format_spec.rb
170
188
  - spec/feedx/producer_spec.rb
@@ -191,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
209
  - !ruby/object:Gem::Version
192
210
  version: '0'
193
211
  requirements: []
194
- rubygems_version: 3.1.2
212
+ rubygems_version: 3.1.4
195
213
  signing_key:
196
214
  specification_version: 4
197
215
  summary: Exchange data between components via feeds
@@ -204,6 +222,7 @@ test_files:
204
222
  - spec/feedx/consumer_spec.rb
205
223
  - spec/feedx/format/abstract_spec.rb
206
224
  - spec/feedx/format/json_spec.rb
225
+ - spec/feedx/format/parquet_spec.rb
207
226
  - spec/feedx/format/protobuf_spec.rb
208
227
  - spec/feedx/format_spec.rb
209
228
  - spec/feedx/producer_spec.rb