feedx 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,24 @@
1
1
  require 'pbio'
2
2
 
3
3
  class Feedx::Format::Protobuf < Feedx::Format::Abstract
4
- def initialize(io)
5
- super PBIO::Delimited.new(io)
6
- end
4
+ class Decoder < Feedx::Format::Abstract::Decoder
5
+ def initialize(io, **opts)
6
+ super PBIO::Delimited.new(io), **opts
7
+ end
7
8
 
8
- def decode(klass, **)
9
- @io.read(klass)
9
+ def decode(target, **)
10
+ @io.read(target)
11
+ end
10
12
  end
11
13
 
12
- def encode(msg, **opts)
13
- msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
14
- @io.write msg
14
+ class Encoder < Feedx::Format::Abstract::Encoder
15
+ def initialize(io, **opts)
16
+ super PBIO::Delimited.new(io), **opts
17
+ end
18
+
19
+ def encode(msg, **opts)
20
+ msg = msg.to_pb(**opts) if msg.respond_to?(:to_pb)
21
+ @io.write msg
22
+ end
15
23
  end
16
24
  end
@@ -6,8 +6,8 @@ module Feedx
6
6
  # Produces a relation as an encoded feed to a remote location.
7
7
  class Producer
8
8
  # See constructor.
9
- def self.perform(url, opts={}, &block)
10
- new(url, opts, &block).perform
9
+ def self.perform(url, **opts, &block)
10
+ new(url, **opts, &block).perform
11
11
  end
12
12
 
13
13
  # @param [String] url the destination URL.
@@ -19,11 +19,11 @@ module Feedx
19
19
  # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
20
20
  # @yield A block factory to generate the relation or enumerator.
21
21
  # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
22
- def initialize(url, opts={}, &block)
22
+ def initialize(url, **opts, &block)
23
23
  @enum = opts[:enum] || block
24
24
  raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
25
25
 
26
- @stream = Feedx::Stream.new(url, opts)
26
+ @stream = Feedx::Stream.new(url, **opts)
27
27
  @last_mod = opts[:last_modified]
28
28
  @fmt_opts = opts[:format_options] || {}
29
29
  end
@@ -37,7 +37,8 @@ module Feedx
37
37
  metadata = @stream.blob.info.metadata
38
38
  remote_rev = (metadata[META_LAST_MODIFIED] || metadata[META_LAST_MODIFIED_DC]).to_i
39
39
  return -1 unless local_rev > remote_rev
40
- rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
40
+ rescue BFS::FileNotFound
41
+ nil
41
42
  end if local_rev.positive?
42
43
 
43
44
  @stream.create metadata: { META_LAST_MODIFIED => local_rev.to_s } do |fmt|
@@ -10,7 +10,7 @@ module Feedx
10
10
  # @param [Hash] opts options
11
11
  # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
12
12
  # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
13
- def initialize(url, opts={})
13
+ def initialize(url, **opts)
14
14
  @blob = BFS::Blob.new(url)
15
15
  @format = detect_format(opts[:format])
16
16
  @compress = detect_compress(opts[:compress])
@@ -20,11 +20,12 @@ module Feedx
20
20
  # @param [Hash] opts BFS::Blob#open options
21
21
  # @yield A block over a formatted stream.
22
22
  # @yieldparam [Feedx::Format::Abstract] formatted input stream.
23
- def open(opts={})
24
- @blob.open(opts) do |io|
23
+ def open(**opts)
24
+ @blob.open(**opts) do |io|
25
25
  @compress.reader(io) do |cio|
26
- fmt = @format.new(cio)
27
- yield fmt
26
+ @format.decoder(cio) do |fmt|
27
+ yield fmt
28
+ end
28
29
  end
29
30
  end
30
31
  end
@@ -33,11 +34,12 @@ module Feedx
33
34
  # @param [Hash] opts BFS::Blob#create options
34
35
  # @yield A block over a formatted stream.
35
36
  # @yieldparam [Feedx::Format::Abstract] formatted output stream.
36
- def create(opts={})
37
- @blob.create(opts) do |io|
37
+ def create(**opts)
38
+ @blob.create(**opts) do |io|
38
39
  @compress.writer(io) do |cio|
39
- fmt = @format.new(cio)
40
- yield fmt
40
+ @format.encoder(cio) do |fmt|
41
+ yield fmt
42
+ end
41
43
  end
42
44
  end
43
45
  end
@@ -48,13 +50,10 @@ module Feedx
48
50
  case val
49
51
  when nil
50
52
  Feedx::Format.detect(@blob.path)
51
- when Class
52
- parent = Feedx::Format::Abstract
53
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
54
-
55
- val
56
- else
53
+ when String, Symbol
57
54
  Feedx::Format.resolve(val)
55
+ else
56
+ Feedx::Format.validate!(val)
58
57
  end
59
58
  end
60
59
 
@@ -62,13 +61,10 @@ module Feedx
62
61
  case val
63
62
  when nil
64
63
  Feedx::Compression.detect(@blob.path)
65
- when Class
66
- parent = Feedx::Compression::Abstract
67
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
68
-
69
- val
70
- else
64
+ when String, Symbol
71
65
  Feedx::Compression.resolve(val)
66
+ else
67
+ Feedx::Compression.validate!(val)
72
68
  end
73
69
  end
74
70
  end
@@ -140,21 +140,14 @@ func (p *Producer) push() (*ProducerPush, error) {
140
140
  wopt.LastMod = modTime
141
141
  }
142
142
 
143
- // retrieve original last modified time
144
- lastMod, err := remoteLastModified(p.ctx, p.remote)
145
- if err != nil {
143
+ // retrieve original last modified time, skip if not modified
144
+ if rts, err := remoteLastModified(p.ctx, p.remote); err != nil {
146
145
  return nil, err
147
- }
148
-
149
- // skip push if not modified
150
- if lastMod.Time().Equal(wopt.LastMod) {
146
+ } else if rts == timestampFromTime(wopt.LastMod) {
151
147
  return &ProducerPush{Producer: p}, nil
152
148
  }
153
149
 
154
- writer, err := NewWriter(p.ctx, p.remote, &wopt)
155
- if err != nil {
156
- return nil, err
157
- }
150
+ writer := NewWriter(p.ctx, p.remote, &wopt)
158
151
  defer writer.Discard()
159
152
 
160
153
  if err := p.pfn(writer); err != nil {
@@ -2,6 +2,7 @@ package feedx_test
2
2
 
3
3
  import (
4
4
  "context"
5
+ "sync/atomic"
5
6
  "time"
6
7
 
7
8
  "github.com/bsm/bfs"
@@ -13,14 +14,16 @@ import (
13
14
  var _ = Describe("Producer", func() {
14
15
  var subject *feedx.Producer
15
16
  var obj *bfs.Object
17
+ var numRuns uint32
16
18
  var ctx = context.Background()
17
19
 
18
20
  setup := func(o *feedx.ProducerOptions) {
19
21
  var err error
20
22
  subject, err = feedx.NewProducerForRemote(ctx, obj, o, func(w *feedx.Writer) error {
23
+ atomic.AddUint32(&numRuns, 1)
24
+
21
25
  for i := 0; i < 10; i++ {
22
- fix := fixture
23
- if err := w.Encode(&fix); err != nil {
26
+ if err := w.Encode(seed()); err != nil {
24
27
  return err
25
28
  }
26
29
  }
@@ -30,6 +33,7 @@ var _ = Describe("Producer", func() {
30
33
  }
31
34
 
32
35
  BeforeEach(func() {
36
+ atomic.StoreUint32(&numRuns, 0)
33
37
  obj = bfs.NewInMemObject("path/to/file.jsonz")
34
38
  })
35
39
 
@@ -54,16 +58,22 @@ var _ = Describe("Producer", func() {
54
58
 
55
59
  It("should produce with custom last-mod check", func() {
56
60
  setup(&feedx.ProducerOptions{
57
- LastModCheck: func(_ context.Context) (time.Time, error) { return time.Unix(1515151515, 0), nil },
61
+ Interval: 50 * time.Millisecond,
62
+ LastModCheck: func(_ context.Context) (time.Time, error) { return time.Unix(1515151515, 987654321), nil },
58
63
  })
59
64
 
60
- Expect(subject.LastPush()).To(BeTemporally("~", time.Now(), time.Second))
61
- Expect(subject.LastModified()).To(Equal(time.Unix(1515151515, 0)))
65
+ firstPush := subject.LastPush()
66
+ Expect(firstPush).To(BeTemporally("~", time.Now(), time.Second))
67
+ Expect(subject.LastModified()).To(Equal(time.Unix(1515151515, 987000000)))
62
68
  Expect(subject.NumWritten()).To(Equal(10))
69
+ Expect(atomic.LoadUint32(&numRuns)).To(Equal(uint32(1)))
63
70
 
64
71
  info, err := obj.Head(ctx)
65
72
  Expect(err).NotTo(HaveOccurred())
66
73
  Expect(info.Size).To(BeNumerically("~", 75, 10))
67
- Expect(info.Metadata).To(HaveKeyWithValue("X-Feedx-Last-Modified", "1515151515000"))
74
+ Expect(info.Metadata).To(HaveKeyWithValue("X-Feedx-Last-Modified", "1515151515987"))
75
+
76
+ Eventually(func() bool { return subject.LastPush().After(firstPush) }).Should(BeTrue())
77
+ Expect(atomic.LoadUint32(&numRuns)).To(Equal(uint32(1)))
68
78
  })
69
79
  })
@@ -5,10 +5,9 @@ import (
5
5
  "io"
6
6
  "io/ioutil"
7
7
 
8
- "github.com/bsm/feedx"
9
-
10
8
  "github.com/bsm/bfs"
11
- tbp "github.com/golang/protobuf/proto/proto3_proto"
9
+ "github.com/bsm/feedx"
10
+ "github.com/bsm/feedx/internal/testdata"
12
11
  . "github.com/onsi/ginkgo"
13
12
  . "github.com/onsi/gomega"
14
13
  )
@@ -34,23 +33,23 @@ var _ = Describe("Reader", func() {
34
33
  It("should read", func() {
35
34
  data, err := ioutil.ReadAll(subject)
36
35
  Expect(err).NotTo(HaveOccurred())
37
- Expect(len(data)).To(BeNumerically("~", 140, 20))
36
+ Expect(len(data)).To(BeNumerically("~", 110, 20))
38
37
  Expect(subject.NumRead()).To(Equal(0))
39
38
  })
40
39
 
41
40
  It("should decode", func() {
42
- var msgs []tbp.Message
41
+ var msgs []*testdata.MockMessage
43
42
  for {
44
- var msg tbp.Message
43
+ var msg testdata.MockMessage
45
44
  err := subject.Decode(&msg)
46
45
  if err == io.EOF {
47
46
  break
48
47
  }
49
48
  Expect(err).NotTo(HaveOccurred())
50
- msgs = append(msgs, msg)
49
+ msgs = append(msgs, &msg)
51
50
  }
52
51
 
53
- Expect(msgs).To(Equal([]tbp.Message{fixture, fixture, fixture}))
52
+ Expect(msgs).To(ConsistOf(seed(), seed(), seed()))
54
53
  Expect(subject.NumRead()).To(Equal(3))
55
54
  })
56
55
  })
@@ -3,13 +3,15 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::Gzip do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to be_within(20).of(40)
8
+ expect(wio.string.encoding).to eq(Encoding::BINARY)
8
9
 
9
10
  data = ''
10
11
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
12
+ subject.reader(rio) {|z| data = z.read }
12
13
  end
13
14
  expect(data.size).to eq(3000)
15
+ expect(data.encoding).to eq(Encoding.default_external)
14
16
  end
15
17
  end
@@ -3,12 +3,12 @@ require 'spec_helper'
3
3
  RSpec.describe Feedx::Compression::None do
4
4
  it 'should wrap readers/writers' do
5
5
  wio = StringIO.new
6
- described_class.writer(wio) {|w| w.write 'xyz' * 1000 }
6
+ subject.writer(wio) {|w| w.write 'xyz' * 1000 }
7
7
  expect(wio.size).to eq(3000)
8
8
 
9
9
  data = ''
10
10
  StringIO.open(wio.string) do |rio|
11
- described_class.reader(rio) {|z| data = z.read }
11
+ subject.reader(rio) {|z| data = z.read }
12
12
  end
13
13
  expect(data.size).to eq(3000)
14
14
  end
@@ -2,18 +2,18 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Compression do
4
4
  it 'should resolve' do
5
- expect(described_class.resolve(:gzip)).to eq(described_class::Gzip)
6
- expect(described_class.resolve(:gz)).to eq(described_class::Gzip)
7
- expect(described_class.resolve(nil)).to eq(described_class::None)
5
+ expect(described_class.resolve(:gzip)).to be_instance_of(described_class::Gzip)
6
+ expect(described_class.resolve(:gz)).to be_instance_of(described_class::Gzip)
7
+ expect(described_class.resolve(nil)).to be_instance_of(described_class::None)
8
8
  expect { described_class.resolve(:txt) }.to raise_error(/invalid compression txt/)
9
9
  end
10
10
 
11
11
  it 'should detect' do
12
- expect(described_class.detect('path/to/file.jsonz')).to eq(described_class::Gzip)
13
- expect(described_class.detect('path/to/file.json.gz')).to eq(described_class::Gzip)
14
- expect(described_class.detect('path/to/file.json')).to eq(described_class::None)
15
- expect(described_class.detect('path/to/file.pbz')).to eq(described_class::Gzip)
16
- expect(described_class.detect('path/to/file.pb.gz')).to eq(described_class::Gzip)
17
- expect(described_class.detect('path/to/file.pb')).to eq(described_class::None)
12
+ expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::Gzip)
13
+ expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::Gzip)
14
+ expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::None)
15
+ expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Gzip)
16
+ expect(described_class.detect('path/to/file.pb.gz')).to be_instance_of(described_class::Gzip)
17
+ expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::None)
18
18
  end
19
19
  end
@@ -36,10 +36,13 @@ RSpec.describe Feedx::Consumer do
36
36
 
37
37
  private
38
38
 
39
- def mock_produce!(opts={})
39
+ def mock_produce!(enum: mock_enum, **opts)
40
40
  url = 'mock:///dir/file.json'
41
- opts[:enum] ||= %w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
42
- Feedx::Producer.perform url, opts
41
+ Feedx::Producer.perform url, enum: enum, **opts
43
42
  url
44
43
  end
44
+
45
+ def mock_enum
46
+ %w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
47
+ end
45
48
  end
@@ -1,17 +1,20 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::Abstract do
4
- subject { Feedx::Format::JSON.new(wio) }
4
+ subject { Feedx::Format::JSON.new }
5
5
  let(:wio) { StringIO.new }
6
+ let(:rio) { StringIO.open(wio.string) }
6
7
 
7
8
  it 'should decode each' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
11
- StringIO.open(wio.string) do |rio|
12
- fmt = subject.class.new(rio)
13
- dec = fmt.decode_each(Feedx::TestCase::Model).to_a
14
- expect(dec.map(&:title)).to eq(%w[X Y Z])
9
+ subject.encoder wio do |enc|
10
+ enc.encode(Feedx::TestCase::Model.new('X'))
11
+ enc.encode(Feedx::TestCase::Model.new('Y'))
12
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
13
+ end
14
+
15
+ subject.decoder rio do |dec|
16
+ acc = dec.decode_each(Feedx::TestCase::Model).to_a
17
+ expect(acc.map(&:title)).to eq(%w[X Y Z])
15
18
  end
16
19
  end
17
20
  end
@@ -1,26 +1,27 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe Feedx::Format::JSON do
4
- subject { described_class.new(wio) }
5
4
  let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
6
 
7
7
  it 'should encode/decode' do
8
- subject.encode(Feedx::TestCase::Model.new('X'))
9
- subject.encode(Feedx::TestCase::Model.new('Y'))
10
- subject.encode(Feedx::TestCase::Message.new(title: 'Z'))
8
+ subject.encoder wio do |enc|
9
+ enc.encode(Feedx::TestCase::Model.new('X'))
10
+ enc.encode(Feedx::TestCase::Model.new('Y'))
11
+ enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
12
+ end
11
13
  expect(wio.string.lines).to eq [
12
14
  %({"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}\n),
13
15
  %({"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}\n),
14
16
  %({"title":"Z"}\n),
15
17
  ]
16
18
 
17
- StringIO.open(wio.string) do |rio|
18
- fmt = described_class.new(rio)
19
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
20
- expect(fmt.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
21
- expect(fmt.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
22
- expect(fmt.decode(Feedx::TestCase::Model)).to be_nil
23
- expect(fmt).to be_eof
19
+ subject.decoder rio do |dec|
20
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
21
+ expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
22
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
23
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
24
+ expect(dec).to be_eof
24
25
  end
25
26
  end
26
27
  end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe Feedx::Format::Parquet do
4
+ let(:wio) { StringIO.new }
5
+ let(:rio) { StringIO.open(wio.string) }
6
+
7
+ let(:schema) do
8
+ Arrow::Schema.new([
9
+ Arrow::Field.new('title', :string),
10
+ Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
11
+ ])
12
+ end
13
+
14
+ it 'should encode/decode' do
15
+ subject.encoder wio, schema: schema, batch_size: 2 do |enc|
16
+ enc.encode(Feedx::TestCase::Model.new('X'))
17
+ enc.encode(Feedx::TestCase::Model.new('Y'))
18
+ enc.encode(Feedx::TestCase::Model.new('Z'))
19
+ end
20
+ expect(wio.string.bytesize).to be_within(100).of(1100)
21
+
22
+ subject.decoder rio do |dec|
23
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
24
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
25
+ expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
26
+ expect(dec.decode(Feedx::TestCase::Model)).to be_nil
27
+ expect(dec).to be_eof
28
+ end
29
+ end
30
+ end