feedx 0.10.2 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,24 @@
1
1
  require 'pbio'
2
2
 
3
3
  class Feedx::Format::Protobuf < Feedx::Format::Abstract
4
- def initialize(io)
5
- super PBIO::Delimited.new(io)
6
- end
4
+ class Decoder < Feedx::Format::Abstract::Decoder
5
+ def initialize(io, **opts)
6
+ super PBIO::Delimited.new(io), **opts
7
+ end
7
8
 
8
- def decode(klass, **)
9
- @io.read(klass)
9
+ def decode(target, **)
10
+ @io.read(target)
11
+ end
10
12
  end
11
13
 
12
- def encode(msg, **opts)
13
- msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
14
- @io.write msg
14
+ class Encoder < Feedx::Format::Abstract::Encoder
15
+ def initialize(io, **opts)
16
+ super PBIO::Delimited.new(io), **opts
17
+ end
18
+
19
+ def encode(msg, **opts)
20
+ msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
21
+ @io.write msg
22
+ end
15
23
  end
16
24
  end
@@ -23,8 +23,9 @@ module Feedx
23
23
  def open(**opts)
24
24
  @blob.open(**opts) do |io|
25
25
  @compress.reader(io) do |cio|
26
- fmt = @format.new(cio)
27
- yield fmt
26
+ @format.decoder(cio) do |fmt|
27
+ yield fmt
28
+ end
28
29
  end
29
30
  end
30
31
  end
@@ -36,8 +37,9 @@ module Feedx
36
37
  def create(**opts)
37
38
  @blob.create(**opts) do |io|
38
39
  @compress.writer(io) do |cio|
39
- fmt = @format.new(cio)
40
- yield fmt
40
+ @format.encoder(cio) do |fmt|
41
+ yield fmt
42
+ end
41
43
  end
42
44
  end
43
45
  end
@@ -48,13 +50,10 @@ module Feedx
48
50
  case val
49
51
  when nil
50
52
  Feedx::Format.detect(@blob.path)
51
- when Class
52
- parent = Feedx::Format::Abstract
53
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
54
-
55
- val
56
- else
53
+ when String, Symbol
57
54
  Feedx::Format.resolve(val)
55
+ else
56
+ Feedx::Format.validate!(val)
58
57
  end
59
58
  end
60
59
 
@@ -62,13 +61,10 @@ module Feedx
62
61
  case val
63
62
  when nil
64
63
  Feedx::Compression.detect(@blob.path)
65
- when Class
66
- parent = Feedx::Compression::Abstract
67
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
68
-
69
- val
70
- else
64
+ when String, Symbol
71
65
  Feedx::Compression.resolve(val)
66
+ else
67
+ Feedx::Compression.validate!(val)
72
68
  end
73
69
  end
74
70
  end
@@ -23,8 +23,7 @@ var _ = Describe("Producer", func() {
23
23
  atomic.AddUint32(&numRuns, 1)
24
24
 
25
25
  for i := 0; i < 10; i++ {
26
- fix := fixture
27
- if err := w.Encode(&fix); err != nil {
26
+ if err := w.Encode(seed()); err != nil {
28
27
  return err
29
28
  }
30
29
  }
@@ -5,9 +5,9 @@ import (
5
5
  "io"
6
6
  "io/ioutil"
7
7
 
8
- "github.com/bsm/feedx"
9
-
10
8
  "github.com/bsm/bfs"
9
+ "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
11
11
  . "github.com/onsi/ginkgo"
12
12
  . "github.com/onsi/gomega"
13
13
  )
@@ -38,18 +38,18 @@ var _ = Describe("Reader", func() {
38
38
  })
39
39
 
40
40
  It("should decode", func() {
41
- var msgs []MockMessage
41
+ var msgs []*testdata.MockMessage
42
42
  for {
43
- var msg MockMessage
43
+ var msg testdata.MockMessage
44
44
  err := subject.Decode(&msg)
45
45
  if err == io.EOF {
46
46
  break
47
47
  }
48
48
  Expect(err).NotTo(HaveOccurred())
49
- msgs = append(msgs, msg)
49
+ msgs = append(msgs, &msg)
50
50
  }
51
51
 
52
- Expect(msgs).To(Equal([]MockMessage{fixture, fixture, fixture}))
52
+ Expect(msgs).To(ConsistOf(seed(), seed(), seed()))
53
53
  Expect(subject.NumRead()).To(Equal(3))
54
54
  })
55
55
  })
@@ -3,13 +3,13 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::Gzip do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to be_within(20).of(40)
8
8
  expect(wio.string.encoding).to eq(Encoding::BINARY)
9
9
 
10
10
  data = ''
11
11
  StringIO.open(wio.string) do |rio|
12
- described_class.reader(rio) {|z| data = z.read }
12
+ subject.reader(rio) {|z| data = z.read }
13
13
  end
14
14
  expect(data.size).to eq(3000)
15
15
  expect(data.encoding).to eq(Encoding.default_external)
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::None do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to eq(3000)
8
8
 
9
9
  data = ''
10
10
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
11
+ subject.reader(rio) {|z| data = z.read }
12
12
  end
13
13
  expect(data.size).to eq(3000)
14
14
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Compression do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:gzip)).to eq(described_class::Gzip)
6
- expect(described_class.resolve(:gz)).to eq(described_class::Gzip)
7
- expect(described_class.resolve(nil)).to eq(described_class::None)
5
+ expect(described_class.resolve(:gzip)).to be_instance_of(described_class::Gzip)
6
+ expect(described_class.resolve(:gz)).to be_instance_of(described_class::Gzip)
7
+ expect(described_class.resolve(nil)).to be_instance_of(described_class::None)
8
8
  expect { described_class.resolve(:txt) }.to raise_error(/invalid compression txt/)
9
9
  end
10
10
 
11
11
  it 'should detect' do
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::Gzip)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::Gzip)
14
- expect(described_class.detect('path/to/file.json')).to eq(described_class::None)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Gzip)
16
- expect(described_class.detect('path/to/file.pb.gz')).to eq(described_class::Gzip)
17
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::None)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::Gzip)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::Gzip)
14
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::None)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Gzip)
16
+ expect(described_class.detect('path/to/file.pb.gz')).to be_instance_of(described_class::Gzip)
17
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::None)
18
18
  end
19
19
  end
@@ -1,17 +1,20 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Abstract do
4
- subject { Feedx::Format::JSON.new(wio) }
4
+ subject { Feedx::Format::JSON.new }
5
5
  let(:wio) { StringIO.new }
6
+ let(:rio) { StringIO.open(wio.string) }
6
7
 
7
8
  it 'should decode each' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
11
- StringIO.open(wio.string) do |rio|
12
- fmt = subject.class.new(rio)
13
- dec = fmt.decode_each(Feedx::TestCase::Model).to_a
14
- expect(dec.map(&:title)).to eq(%w[X Y Z])
9
+ subject.encoder wio do |enc|
10
+ enc.encode(Feedx::TestCase::Model.new('X'))
11
+ enc.encode(Feedx::TestCase::Model.new('Y'))
12
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
13
+ end
14
+
15
+ subject.decoder rio do |dec|
16
+ acc = dec.decode_each(Feedx::TestCase::Model).to_a
17
+ expect(acc.map(&:title)).to eq(%w[X Y Z])
15
18
  end
16
19
  end
17
20
  end
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end
@@ -1,22 +1,23 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Protobuf do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
12
14
 
13
- StringIO.open(wio.string) do |rio|
14
- fmt = described_class.new(rio)
15
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
16
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
17
- expect(fmt.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
18
- expect(fmt.decode(Feedx::TestCase::Message)).to be_nil
19
- expect(fmt).to be_eof
15
+ subject.decoder rio do |dec|
16
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
17
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
18
+ expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
19
+ expect(dec.decode(Feedx::TestCase::Message)).to be_nil
20
+ expect(dec).to be_eof
20
21
  end
21
22
  end
22
23
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:json)).to eq(described_class::JSON)
6
- expect(described_class.resolve(:pb)).to eq(described_class::Protobuf)
5
+ expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
6
+ expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
7
7
  expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
8
8
  end
9
9
 
10
10
  it 'should detect' do
11
- expect(described_class.detect('path/to/file.json')).to eq(described_class::JSON)
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::JSON)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::JSON)
14
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::Protobuf)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Protobuf)
16
- expect(described_class.detect('path/to/file.pb.z')).to eq(described_class::Protobuf)
11
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
14
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
16
+ expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
17
17
  expect do
18
18
  described_class.detect('path/to/file.txt')
19
19
  end.to raise_error(/unable to detect format/)
@@ -13,13 +13,32 @@ RSpec.describe Feedx::Stream do
13
13
  end.to raise_error(/unable to detect format/)
14
14
  end
15
15
 
16
+ it 'should accept custom formats' do
17
+ format = Class.new do
18
+ def encoder(io, &block)
19
+ Feedx::Format::JSON::Encoder.open(io, &block)
20
+ end
21
+
22
+ def decoder(io, &block)
23
+ Feedx::Format::JSON::Decoder.open(io, &block)
24
+ end
25
+ end
26
+
27
+ stream = described_class.new('mock:///dir/file.txt', format: format.new)
28
+ stream.create {|s| s.encode Feedx::TestCase::Model.new('X') }
29
+
30
+ expect(bucket.read('dir/file.txt')).to eq(
31
+ %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
32
+ )
33
+ end
34
+
16
35
  it 'should encode' do
17
36
  subject.create do |s|
18
37
  s.encode(Feedx::TestCase::Model.new('X'))
19
38
  s.encode(Feedx::TestCase::Model.new('Y'))
20
39
  end
21
40
 
22
- expect(bucket.open('dir/file.json').read).to eq(
41
+ expect(bucket.read('dir/file.json')).to eq(
23
42
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n) +
24
43
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
25
44
  )
@@ -28,13 +28,29 @@ module Feedx
28
28
  end
29
29
  alias eql? ==
30
30
 
31
+ def updated_at
32
+ Time.at(1515151515).utc
33
+ end
34
+
31
35
  def from_json(data, *)
32
36
  hash = ::JSON.parse(data)
33
37
  @title = hash['title'] if hash.is_a?(Hash)
34
38
  end
35
39
 
36
40
  def to_json(*)
37
- ::JSON.dump(title: @title, updated_at: Time.at(1515151515).utc)
41
+ ::JSON.dump(title: @title, updated_at: updated_at)
42
+ end
43
+
44
+ def from_parquet(rec)
45
+ rec.each_pair do |name, value|
46
+ @title = value if name == 'title'
47
+ end
48
+ end
49
+
50
+ def to_parquet(schema, *)
51
+ schema.fields.map do |field|
52
+ send(field.name)
53
+ end
38
54
  end
39
55
  end
40
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.2
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-28 00:00:00.000000000 Z
11
+ date: 2020-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: red-parquet
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,8 @@ files:
136
150
  - format_test.go
137
151
  - go.mod
138
152
  - go.sum
153
+ - internal/testdata/testdata.pb.go
154
+ - internal/testdata/testdata.proto
139
155
  - lib/feedx.rb
140
156
  - lib/feedx/cache.rb
141
157
  - lib/feedx/cache/abstract.rb
@@ -149,6 +165,7 @@ files:
149
165
  - lib/feedx/format.rb
150
166
  - lib/feedx/format/abstract.rb
151
167
  - lib/feedx/format/json.rb
168
+ - lib/feedx/format/parquet.rb
152
169
  - lib/feedx/format/protobuf.rb
153
170
  - lib/feedx/producer.rb
154
171
  - lib/feedx/pusher.rb
@@ -165,6 +182,7 @@ files:
165
182
  - spec/feedx/consumer_spec.rb
166
183
  - spec/feedx/format/abstract_spec.rb
167
184
  - spec/feedx/format/json_spec.rb
185
+ - spec/feedx/format/parquet_spec.rb
168
186
  - spec/feedx/format/protobuf_spec.rb
169
187
  - spec/feedx/format_spec.rb
170
188
  - spec/feedx/producer_spec.rb
@@ -191,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
209
  - !ruby/object:Gem::Version
192
210
  version: '0'
193
211
  requirements: []
194
- rubygems_version: 3.1.2
212
+ rubygems_version: 3.1.4
195
213
  signing_key:
196
214
  specification_version: 4
197
215
  summary: Exchange data between components via feeds
@@ -204,6 +222,7 @@ test_files:
204
222
  - spec/feedx/consumer_spec.rb
205
223
  - spec/feedx/format/abstract_spec.rb
206
224
  - spec/feedx/format/json_spec.rb
225
+ - spec/feedx/format/parquet_spec.rb
207
226
  - spec/feedx/format/protobuf_spec.rb
208
227
  - spec/feedx/format_spec.rb
209
228
  - spec/feedx/producer_spec.rb