feedx 0.9.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,24 @@
1
1
  require 'pbio'
2
2
 
3
3
  class Feedx::Format::Protobuf < Feedx::Format::Abstract
4
- def initialize(io)
5
- super PBIO::Delimited.new(io)
6
- end
4
+ class Decoder < Feedx::Format::Abstract::Decoder
5
+ def initialize(io, **opts)
6
+ super PBIO::Delimited.new(io), **opts
7
+ end
7
8
 
8
- def decode(klass, **)
9
- @io.read(klass)
9
+ def decode(target, **)
10
+ @io.read(target)
11
+ end
10
12
  end
11
13
 
12
- def encode(msg, **opts)
13
- msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
14
- @io.write msg
14
+ class Encoder < Feedx::Format::Abstract::Encoder
15
+ def initialize(io, **opts)
16
+ super PBIO::Delimited.new(io), **opts
17
+ end
18
+
19
+ def encode(msg, **opts)
20
+ msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
21
+ @io.write msg
22
+ end
15
23
  end
16
24
  end
@@ -6,8 +6,8 @@ module Feedx
6
6
  # Produces a relation as an encoded feed to a remote location.
7
7
  class Producer
8
8
  # See constructor.
9
- def self.perform(url, opts={}, &block)
10
- new(url, opts, &block).perform
9
+ def self.perform(url, **opts, &block)
10
+ new(url, **opts, &block).perform
11
11
  end
12
12
 
13
13
  # @param [String] url the destination URL.
@@ -19,11 +19,11 @@ module Feedx
19
19
  # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
20
20
  # @yield A block factory to generate the relation or enumerator.
21
21
  # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
22
- def initialize(url, opts={}, &block)
22
+ def initialize(url, **opts, &block)
23
23
  @enum = opts[:enum] || block
24
24
  raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
25
25
 
26
- @stream = Feedx::Stream.new(url, opts)
26
+ @stream = Feedx::Stream.new(url, **opts)
27
27
  @last_mod = opts[:last_modified]
28
28
  @fmt_opts = opts[:format_options] || {}
29
29
  end
@@ -37,7 +37,8 @@ module Feedx
37
37
  metadata = @stream.blob.info.metadata
38
38
  remote_rev = (metadata[META_LAST_MODIFIED] || metadata[META_LAST_MODIFIED_DC]).to_i
39
39
  return -1 unless local_rev > remote_rev
40
- rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
40
+ rescue BFS::FileNotFound
41
+ nil
41
42
  end if local_rev.positive?
42
43
 
43
44
  @stream.create metadata: { META_LAST_MODIFIED => local_rev.to_s } do |fmt|
@@ -10,7 +10,7 @@ module Feedx
10
10
  # @param [Hash] opts options
11
11
  # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
12
12
  # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
13
- def initialize(url, opts={})
13
+ def initialize(url, **opts)
14
14
  @blob = BFS::Blob.new(url)
15
15
  @format = detect_format(opts[:format])
16
16
  @compress = detect_compress(opts[:compress])
@@ -20,11 +20,12 @@ module Feedx
20
20
  # @param [Hash] opts BFS::Blob#open options
21
21
  # @yield A block over a formatted stream.
22
22
  # @yieldparam [Feedx::Format::Abstract] formatted input stream.
23
- def open(opts={})
24
- @blob.open(opts) do |io|
23
+ def open(**opts)
24
+ @blob.open(**opts) do |io|
25
25
  @compress.reader(io) do |cio|
26
- fmt = @format.new(cio)
27
- yield fmt
26
+ @format.decoder(cio) do |fmt|
27
+ yield fmt
28
+ end
28
29
  end
29
30
  end
30
31
  end
@@ -33,11 +34,12 @@ module Feedx
33
34
  # @param [Hash] opts BFS::Blob#create options
34
35
  # @yield A block over a formatted stream.
35
36
  # @yieldparam [Feedx::Format::Abstract] formatted output stream.
36
- def create(opts={})
37
- @blob.create(opts) do |io|
37
+ def create(**opts)
38
+ @blob.create(**opts) do |io|
38
39
  @compress.writer(io) do |cio|
39
- fmt = @format.new(cio)
40
- yield fmt
40
+ @format.encoder(cio) do |fmt|
41
+ yield fmt
42
+ end
41
43
  end
42
44
  end
43
45
  end
@@ -48,13 +50,10 @@ module Feedx
48
50
  case val
49
51
  when nil
50
52
  Feedx::Format.detect(@blob.path)
51
- when Class
52
- parent = Feedx::Format::Abstract
53
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
54
-
55
- val
56
- else
53
+ when String, Symbol
57
54
  Feedx::Format.resolve(val)
55
+ else
56
+ Feedx::Format.validate!(val)
58
57
  end
59
58
  end
60
59
 
@@ -62,13 +61,10 @@ module Feedx
62
61
  case val
63
62
  when nil
64
63
  Feedx::Compression.detect(@blob.path)
65
- when Class
66
- parent = Feedx::Compression::Abstract
67
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
68
-
69
- val
70
- else
64
+ when String, Symbol
71
65
  Feedx::Compression.resolve(val)
66
+ else
67
+ Feedx::Compression.validate!(val)
72
68
  end
73
69
  end
74
70
  end
@@ -140,21 +140,14 @@ func (p *Producer) push() (*ProducerPush, error) {
140
140
  wopt.LastMod = modTime
141
141
  }
142
142
 
143
- // retrieve original last modified time
144
- lastMod, err := remoteLastModified(p.ctx, p.remote)
145
- if err != nil {
143
+ // retrieve original last modified time, skip if not modified
144
+ if rts, err := remoteLastModified(p.ctx, p.remote); err != nil {
146
145
  return nil, err
147
- }
148
-
149
- // skip push if not modified
150
- if lastMod.Time().Equal(wopt.LastMod) {
146
+ } else if rts == timestampFromTime(wopt.LastMod) {
151
147
  return &ProducerPush{Producer: p}, nil
152
148
  }
153
149
 
154
- writer, err := NewWriter(p.ctx, p.remote, &wopt)
155
- if err != nil {
156
- return nil, err
157
- }
150
+ writer := NewWriter(p.ctx, p.remote, &wopt)
158
151
  defer writer.Discard()
159
152
 
160
153
  if err := p.pfn(writer); err != nil {
@@ -2,6 +2,7 @@ package feedx_test
2
2
 
3
3
  import (
4
4
  "context"
5
+ "sync/atomic"
5
6
  "time"
6
7
 
7
8
  "github.com/bsm/bfs"
@@ -13,14 +14,16 @@ import (
13
14
  var _ = Describe("Producer", func() {
14
15
  var subject *feedx.Producer
15
16
  var obj *bfs.Object
17
+ var numRuns uint32
16
18
  var ctx = context.Background()
17
19
 
18
20
  setup := func(o *feedx.ProducerOptions) {
19
21
  var err error
20
22
  subject, err = feedx.NewProducerForRemote(ctx, obj, o, func(w *feedx.Writer) error {
23
+ atomic.AddUint32(&numRuns, 1)
24
+
21
25
  for i := 0; i < 10; i++ {
22
- fix := fixture
23
- if err := w.Encode(&fix); err != nil {
26
+ if err := w.Encode(seed()); err != nil {
24
27
  return err
25
28
  }
26
29
  }
@@ -30,6 +33,7 @@ var _ = Describe("Producer", func() {
30
33
  }
31
34
 
32
35
  BeforeEach(func() {
36
+ atomic.StoreUint32(&numRuns, 0)
33
37
  obj = bfs.NewInMemObject("path/to/file.jsonz")
34
38
  })
35
39
 
@@ -54,16 +58,22 @@ var _ = Describe("Producer", func() {
54
58
 
55
59
  It("should produce with custom last-mod check", func() {
56
60
  setup(&feedx.ProducerOptions{
57
- LastModCheck: func(_ context.Context) (time.Time, error) { return time.Unix(1515151515, 0), nil },
61
+ Interval: 50 * time.Millisecond,
62
+ LastModCheck: func(_ context.Context) (time.Time, error) { return time.Unix(1515151515, 987654321), nil },
58
63
  })
59
64
 
60
- Expect(subject.LastPush()).To(BeTemporally("~", time.Now(), time.Second))
61
- Expect(subject.LastModified()).To(Equal(time.Unix(1515151515, 0)))
65
+ firstPush := subject.LastPush()
66
+ Expect(firstPush).To(BeTemporally("~", time.Now(), time.Second))
67
+ Expect(subject.LastModified()).To(Equal(time.Unix(1515151515, 987000000)))
62
68
  Expect(subject.NumWritten()).To(Equal(10))
69
+ Expect(atomic.LoadUint32(&numRuns)).To(Equal(uint32(1)))
63
70
 
64
71
  info, err := obj.Head(ctx)
65
72
  Expect(err).NotTo(HaveOccurred())
66
73
  Expect(info.Size).To(BeNumerically("~", 75, 10))
67
- Expect(info.Metadata).To(HaveKeyWithValue("X-Feedx-Last-Modified", "1515151515000"))
74
+ Expect(info.Metadata).To(HaveKeyWithValue("X-Feedx-Last-Modified", "1515151515987"))
75
+
76
+ Eventually(func() bool { return subject.LastPush().After(firstPush) }).Should(BeTrue())
77
+ Expect(atomic.LoadUint32(&numRuns)).To(Equal(uint32(1)))
68
78
  })
69
79
  })
@@ -5,10 +5,9 @@ import (
5
5
  "io"
6
6
  "io/ioutil"
7
7
 
8
- "github.com/bsm/feedx"
9
-
10
8
  "github.com/bsm/bfs"
11
- tbp "github.com/golang/protobuf/proto/proto3_proto"
9
+ "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
12
11
  . "github.com/onsi/ginkgo"
13
12
  . "github.com/onsi/gomega"
14
13
  )
@@ -34,23 +33,23 @@ var _ = Describe("Reader", func() {
34
33
  It("should read", func() {
35
34
  data, err := ioutil.ReadAll(subject)
36
35
  Expect(err).NotTo(HaveOccurred())
37
- Expect(len(data)).To(BeNumerically("~", 140, 20))
36
+ Expect(len(data)).To(BeNumerically("~", 110, 20))
38
37
  Expect(subject.NumRead()).To(Equal(0))
39
38
  })
40
39
 
41
40
  It("should decode", func() {
42
- var msgs []tbp.Message
41
+ var msgs []*testdata.MockMessage
43
42
  for {
44
- var msg tbp.Message
43
+ var msg testdata.MockMessage
45
44
  err := subject.Decode(&msg)
46
45
  if err == io.EOF {
47
46
  break
48
47
  }
49
48
  Expect(err).NotTo(HaveOccurred())
50
- msgs = append(msgs, msg)
49
+ msgs = append(msgs, &msg)
51
50
  }
52
51
 
53
- Expect(msgs).To(Equal([]tbp.Message{fixture, fixture, fixture}))
52
+ Expect(msgs).To(ConsistOf(seed(), seed(), seed()))
54
53
  Expect(subject.NumRead()).To(Equal(3))
55
54
  })
56
55
  })
@@ -3,13 +3,15 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::Gzip do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to be_within(20).of(40)
8
+ expect(wio.string.encoding).to eq(Encoding::BINARY)
8
9
 
9
10
  data = ''
10
11
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
12
+ subject.reader(rio) {|z| data = z.read }
12
13
  end
13
14
  expect(data.size).to eq(3000)
15
+ expect(data.encoding).to eq(Encoding.default_external)
14
16
  end
15
17
  end
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::None do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to eq(3000)
8
8
 
9
9
  data = ''
10
10
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
11
+ subject.reader(rio) {|z| data = z.read }
12
12
  end
13
13
  expect(data.size).to eq(3000)
14
14
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Compression do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:gzip)).to eq(described_class::Gzip)
6
- expect(described_class.resolve(:gz)).to eq(described_class::Gzip)
7
- expect(described_class.resolve(nil)).to eq(described_class::None)
5
+ expect(described_class.resolve(:gzip)).to be_instance_of(described_class::Gzip)
6
+ expect(described_class.resolve(:gz)).to be_instance_of(described_class::Gzip)
7
+ expect(described_class.resolve(nil)).to be_instance_of(described_class::None)
8
8
  expect { described_class.resolve(:txt) }.to raise_error(/invalid compression txt/)
9
9
  end
10
10
 
11
11
  it 'should detect' do
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::Gzip)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::Gzip)
14
- expect(described_class.detect('path/to/file.json')).to eq(described_class::None)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Gzip)
16
- expect(described_class.detect('path/to/file.pb.gz')).to eq(described_class::Gzip)
17
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::None)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::Gzip)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::Gzip)
14
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::None)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Gzip)
16
+ expect(described_class.detect('path/to/file.pb.gz')).to be_instance_of(described_class::Gzip)
17
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::None)
18
18
  end
19
19
  end
@@ -36,10 +36,13 @@ RSpec.describe Feedx::Consumer do
36
36
 
37
37
  private
38
38
 
39
- def mock_produce!(opts={})
39
+ def mock_produce!(enum: mock_enum, **opts)
40
40
  url = 'mock:///dir/file.json'
41
- opts[:enum] ||= %w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
42
- Feedx::Producer.perform url, opts
41
+ Feedx::Producer.perform url, enum: enum, **opts
43
42
  url
44
43
  end
44
+
45
+ def mock_enum
46
+ %w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
47
+ end
45
48
  end
@@ -1,17 +1,20 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Abstract do
4
- subject { Feedx::Format::JSON.new(wio) }
4
+ subject { Feedx::Format::JSON.new }
5
5
  let(:wio) { StringIO.new }
6
+ let(:rio) { StringIO.open(wio.string) }
6
7
 
7
8
  it 'should decode each' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
11
- StringIO.open(wio.string) do |rio|
12
- fmt = subject.class.new(rio)
13
- dec = fmt.decode_each(Feedx::TestCase::Model).to_a
14
- expect(dec.map(&:title)).to eq(%w[X Y Z])
9
+ subject.encoder wio do |enc|
10
+ enc.encode(Feedx::TestCase::Model.new('X'))
11
+ enc.encode(Feedx::TestCase::Model.new('Y'))
12
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
13
+ end
14
+
15
+ subject.decoder rio do |dec|
16
+ acc = dec.decode_each(Feedx::TestCase::Model).to_a
17
+ expect(acc.map(&:title)).to eq(%w[X Y Z])
15
18
  end
16
19
  end
17
20
  end
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end