feedx 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d1c12376b32188b646923935967905bb97ab8c86dde28236a0341ee878eb631
4
- data.tar.gz: 3c76d4356a0f6aafcad0e8ea37a2159f8170f5d1cea4072753e3c9947afd7613
3
+ metadata.gz: 67b4f1f345ff01e33e63e66daed1810694330cfe9158b60886f29ae6f98d169c
4
+ data.tar.gz: 684f5f44a347d13cc4f71aa31741e1fe5c477205d870a0db96555364b166b235
5
5
  SHA512:
6
- metadata.gz: d55b8ee3620301c3ffc5c95d2895d00f54ee562250c4aa8236ed93fd99e552cd67c126feab4a17b063415518e46b113ef918491b19e37043b5d36fa9e09c2808
7
- data.tar.gz: b8eee064c4e12e462097087431cdd9b435720eead2d7524c9cc7d44cb4a7dde410756f443b71cd5b24261d3b06c95f1d3975ddca6b6ff69692e0e9a77a742446
6
+ metadata.gz: 0a8a9c98a96d79644a1e36d9035cc17f92e3b41c372424fd4a46180e9517da0090c8a123d30d8bfd959a9726ee92b9b71dbfa0a7992fd3171642b1bfd3fa364e
7
+ data.tar.gz: ba6dcde4c14857f3f024a558ba64050d7eca53cafb66599fef5e39e1a9b3ed6e036fb1f94e43217a77d7cba47cd307b21ea5145a25f51131feeace142976a6b8
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.3.2)
4
+ feedx (0.4.0)
5
5
  bfs (>= 0.3.4)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -5,15 +5,15 @@
5
5
 
6
6
  Feed-based data exchange between services.
7
7
 
8
- ## Usage
8
+ ## Usage (Ruby)
9
9
 
10
10
  ```ruby
11
11
  require 'bfs/s3'
12
12
  require 'feedx'
13
13
 
14
- # Init a new pusher with an S3 destination
14
+ # Init a new producer with an S3 destination
15
15
  relation = Post.includes(:author)
16
- pusher = Feedx::Pusher.new relation, 's3://my-bucket/feeds/users.json.gz'
16
+ producer = Feedx::Producer.new relation, 's3://my-bucket/feeds/users.json.gz'
17
17
 
18
18
  # Push a new feed every hour
19
19
  loop do
@@ -2,91 +2,84 @@ package feedx
2
2
 
3
3
  import (
4
4
  "context"
5
- "fmt"
6
- "strconv"
7
5
  "sync/atomic"
8
6
  "time"
9
7
 
10
8
  "github.com/bsm/bfs"
11
9
  )
12
10
 
13
- // ConsumerOptions configure the Puller instance.
11
+ // ConsumerOptions configure the consumer instance.
14
12
  type ConsumerOptions struct {
15
- // The interval used by Puller to check the remote changes.
13
+ ReaderOptions
14
+
15
+ // The interval used by consumer to check the remote changes.
16
16
  // Default: 1m
17
17
  Interval time.Duration
18
18
 
19
- // Format specifies the format
20
- // Default: auto-detected from URL path.
21
- Format Format
22
-
23
- // Compression specifies the compression type.
24
- // Default: auto-detected from URL path.
25
- Compression Compression
26
-
27
19
  // AfterSync callbacks are triggered after each sync, receiving
28
20
  // the updated status and error (if occurred).
29
21
  AfterSync func(updated bool, err error)
30
22
  }
31
23
 
32
24
  func (o *ConsumerOptions) norm(name string) error {
25
+ o.ReaderOptions.norm(name)
33
26
  if o.Interval <= 0 {
34
27
  o.Interval = time.Minute
35
28
  }
36
- if o.Format == nil {
37
- o.Format = DetectFormat(name)
38
-
39
- if o.Format == nil {
40
- return fmt.Errorf("feedx: unable to detect format from %q", name)
41
- }
42
- }
43
- if o.Compression == nil {
44
- o.Compression = DetectCompression(name)
45
- }
46
29
  return nil
47
30
  }
48
31
 
49
- // ParseFunc is a data parse function.
50
- type ParseFunc func(FormatDecoder) (data interface{}, size int64, err error)
32
+ // ConsumeFunc is a parsing callback which is run by the consumer every sync interval.
33
+ type ConsumeFunc func(FormatDecoder) (data interface{}, err error)
51
34
 
52
35
  // Consumer manages data retrieval from a remote feed.
53
36
  // It queries the feed in regular intervals, continuously retrieving new updates.
54
37
  type Consumer interface {
55
- // Data returns the data as returned by ParseFunc on last sync.
38
+ // Data returns the data as returned by ConsumeFunc on last sync.
56
39
  Data() interface{}
57
- // LastCheck returns time of last sync attempt.
58
- LastCheck() time.Time
40
+ // LastSync returns time of last sync attempt.
41
+ LastSync() time.Time
59
42
  // LastModified returns time at which the remote feed was last modified.
60
43
  LastModified() time.Time
61
- // Size returns the size as returned by ParseFunc on last sync.
62
- Size() int64
44
+ // NumRead returns the number of values consumed during the last sync.
45
+ NumRead() int
63
46
  // Close stops the underlying sync process.
64
47
  Close() error
65
48
  }
66
49
 
67
50
  // NewConsumer starts a new feed consumer.
68
- func NewConsumer(ctx context.Context, srcURL string, opt *ConsumerOptions, parse ParseFunc) (Consumer, error) {
69
- src, err := bfs.NewObject(ctx, srcURL)
51
+ func NewConsumer(ctx context.Context, remoteURL string, opt *ConsumerOptions, cfn ConsumeFunc) (Consumer, error) {
52
+ remote, err := bfs.NewObject(ctx, remoteURL)
53
+ if err != nil {
54
+ return nil, err
55
+ }
56
+
57
+ csm, err := NewConsumerForRemote(ctx, remote, opt, cfn)
70
58
  if err != nil {
59
+ _ = remote.Close()
71
60
  return nil, err
72
61
  }
62
+ csm.(*consumer).ownRemote = true
63
+ return csm, nil
64
+ }
73
65
 
66
+ // NewConsumerForRemote starts a new feed consumer with a remote.
67
+ func NewConsumerForRemote(ctx context.Context, remote *bfs.Object, opt *ConsumerOptions, cfn ConsumeFunc) (Consumer, error) {
74
68
  var o ConsumerOptions
75
69
  if opt != nil {
76
70
  o = *opt
77
71
  }
78
- if err := o.norm(src.Name()); err != nil {
79
- _ = src.Close()
72
+ if err := o.norm(remote.Name()); err != nil {
80
73
  return nil, err
81
74
  }
82
75
 
83
76
  ctx, stop := context.WithCancel(ctx)
84
77
  f := &consumer{
85
- src: src,
86
- opt: o,
87
- ctx: ctx,
88
- stop: stop,
89
- parse: parse,
78
+ remote: remote,
79
+ opt: o,
80
+ ctx: ctx,
81
+ stop: stop,
82
+ cfn: cfn,
90
83
  }
91
84
 
92
85
  // run initial sync
@@ -102,91 +95,81 @@ func NewConsumer(ctx context.Context, srcURL string, opt *ConsumerOptions, parse
102
95
  }
103
96
 
104
97
  type consumer struct {
105
- src *bfs.Object
98
+ remote *bfs.Object
99
+ ownRemote bool
100
+
106
101
  opt ConsumerOptions
107
102
  ctx context.Context
108
103
  stop context.CancelFunc
109
104
 
110
- parse ParseFunc
105
+ cfn ConsumeFunc
106
+ data atomic.Value
111
107
 
112
- size, lastModMs int64
113
- data, lastCheck atomic.Value
108
+ numRead, lastMod, lastSync int64
114
109
  }
115
110
 
116
- // Data implements Feed interface.
111
+ // Data implements Consumer interface.
117
112
  func (f *consumer) Data() interface{} {
118
113
  return f.data.Load()
119
114
  }
120
115
 
121
- // Size implements Feed interface.
122
- func (f *consumer) Size() int64 {
123
- return atomic.LoadInt64(&f.size)
116
+ // NumRead implements Consumer interface.
117
+ func (f *consumer) NumRead() int {
118
+ return int(atomic.LoadInt64(&f.numRead))
124
119
  }
125
120
 
126
- // LastCheck implements Feed interface.
127
- func (f *consumer) LastCheck() time.Time {
128
- return f.lastCheck.Load().(time.Time)
121
+ // LastSync implements Consumer interface.
122
+ func (f *consumer) LastSync() time.Time {
123
+ return timestamp(atomic.LoadInt64(&f.lastSync)).Time()
129
124
  }
130
125
 
131
- // LastModified implements Feed interface.
126
+ // LastModified implements Consumer interface.
132
127
  func (f *consumer) LastModified() time.Time {
133
- msec := atomic.LoadInt64(&f.lastModMs)
134
- return time.Unix(msec/1000, msec%1000*1e6)
128
+ return timestamp(atomic.LoadInt64(&f.lastMod)).Time()
135
129
  }
136
130
 
137
- // Close implements Feed interface.
131
+ // Close implements Consumer interface.
138
132
  func (f *consumer) Close() error {
139
133
  f.stop()
140
- return f.src.Close()
134
+ if f.ownRemote {
135
+ return f.remote.Close()
136
+ }
137
+ return nil
141
138
  }
142
139
 
143
140
  func (f *consumer) sync(force bool) (bool, error) {
144
- f.lastCheck.Store(time.Now())
141
+ defer func() {
142
+ atomic.StoreInt64(&f.lastSync, timestampFromTime(time.Now()).Millis())
143
+ }()
145
144
 
146
- info, err := f.src.Head(f.ctx)
145
+ // retrieve original last modified time
146
+ lastMod, err := remoteLastModified(f.ctx, f.remote)
147
147
  if err != nil {
148
148
  return false, err
149
149
  }
150
150
 
151
- // calculate last modified time
152
- msec, _ := strconv.ParseInt(info.Metadata[lastModifiedMetaKey], 10, 64)
153
-
154
151
  // skip update if not forced or modified
155
- if msec == atomic.LoadInt64(&f.lastModMs) && !force {
152
+ if lastMod.Millis() == atomic.LoadInt64(&f.lastMod) && !force {
156
153
  return false, nil
157
154
  }
158
155
 
159
- // open remote for reading
160
- r, err := f.src.Open(f.ctx)
161
- if err != nil {
162
- return false, err
163
- }
164
- defer r.Close()
165
-
166
- // wrap in compressed reader
167
- c, err := f.opt.Compression.NewReader(r)
168
- if err != nil {
169
- return false, err
170
- }
171
- defer c.Close()
172
-
173
- // open decoder
174
- d, err := f.opt.Format.NewDecoder(c)
156
+ // open remote reader
157
+ reader, err := NewReader(f.ctx, f.remote, &f.opt.ReaderOptions)
175
158
  if err != nil {
176
159
  return false, err
177
160
  }
178
- defer f.Close()
161
+ defer reader.Close()
179
162
 
180
- // parse feed
181
- data, size, err := f.parse(d)
163
+ // consume feed
164
+ data, err := f.cfn(reader)
182
165
  if err != nil {
183
166
  return false, err
184
167
  }
185
168
 
186
169
  // update stores
187
170
  f.data.Store(data)
188
- atomic.StoreInt64(&f.size, size)
189
- atomic.StoreInt64(&f.lastModMs, msec)
171
+ atomic.StoreInt64(&f.numRead, int64(reader.NumRead()))
172
+ atomic.StoreInt64(&f.lastMod, lastMod.Millis())
190
173
  return true, nil
191
174
  }
192
175
 
@@ -200,7 +183,9 @@ func (f *consumer) loop() {
200
183
  return
201
184
  case <-ticker.C:
202
185
  updated, err := f.sync(false)
203
- f.opt.AfterSync(updated, err)
186
+ if f.opt.AfterSync != nil {
187
+ f.opt.AfterSync(updated, err)
188
+ }
204
189
  }
205
190
  }
206
191
  }
@@ -13,58 +13,41 @@ import (
13
13
  )
14
14
 
15
15
  var _ = Describe("Consumer", func() {
16
- ctx := context.Background()
17
- msg := &tbp.Message{
18
- Name: "Joe",
19
- TrueScotsman: true,
20
- Hilarity: tbp.Message_BILL_BAILEY,
21
- }
22
- pfn := func(dec feedx.FormatDecoder) (interface{}, int64, error) {
23
- var msgs []*tbp.Message
24
- for {
25
- msg := new(tbp.Message)
26
- if err := dec.Decode(msg); err == io.EOF {
27
- break
28
- } else if err != nil {
29
- return nil, 0, err
30
- }
31
- msgs = append(msgs, msg)
32
- }
33
- return msgs, int64(len(msgs)), nil
34
- }
16
+ var subject feedx.Consumer
17
+ var obj *bfs.Object
18
+ var ctx = context.Background()
35
19
 
36
20
  BeforeEach(func() {
37
- memStore = bfs.NewInMem()
38
- w, err := memStore.Create(ctx, "path/to/file.jsonz", &bfs.WriteOptions{
39
- Metadata: map[string]string{"x-feedx-pusher-last-modified": "1544477788899"},
21
+ obj = bfs.NewInMemObject("path/to/file.jsonz")
22
+ Expect(writeMulti(obj, 2)).To(Succeed())
23
+
24
+ var err error
25
+ subject, err = feedx.NewConsumerForRemote(ctx, obj, nil, func(dec feedx.FormatDecoder) (interface{}, error) {
26
+ var msgs []tbp.Message
27
+ for {
28
+ var msg tbp.Message
29
+ if err := dec.Decode(&msg); err == io.EOF {
30
+ break
31
+ }
32
+ if err != nil {
33
+ return nil, err
34
+ }
35
+ msgs = append(msgs, msg)
36
+ }
37
+ return msgs, nil
40
38
  })
41
39
  Expect(err).NotTo(HaveOccurred())
42
- defer w.Close()
43
-
44
- c, err := feedx.GZipCompression.NewWriter(w)
45
- Expect(err).NotTo(HaveOccurred())
46
- defer c.Close()
47
-
48
- f, err := feedx.JSONFormat.NewEncoder(c)
49
- Expect(err).NotTo(HaveOccurred())
50
- defer f.Close()
40
+ })
51
41
 
52
- Expect(f.Encode(msg)).To(Succeed())
53
- Expect(f.Encode(msg)).To(Succeed())
54
- Expect(f.Close()).To(Succeed())
55
- Expect(c.Close()).To(Succeed())
56
- Expect(w.Close()).To(Succeed())
42
+ AfterEach(func() {
43
+ Expect(subject.Close()).To(Succeed())
57
44
  })
58
45
 
59
46
  It("should sync and retrieve feeds from remote", func() {
60
- subject, err := feedx.NewConsumer(ctx, "mem:///path/to/file.jsonz", nil, pfn)
61
- Expect(err).NotTo(HaveOccurred())
62
- defer subject.Close()
63
-
64
- Expect(subject.LastCheck()).To(BeTemporally("~", time.Now(), time.Second))
65
- Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1544477788, 0), time.Second))
66
- Expect(subject.Size()).To(Equal(int64(2)))
67
- Expect(subject.Data()).To(Equal([]*tbp.Message{msg, msg}))
47
+ Expect(subject.LastSync()).To(BeTemporally("~", time.Now(), time.Second))
48
+ Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1515151515, 0), time.Second))
49
+ Expect(subject.NumRead()).To(Equal(2))
50
+ Expect(subject.Data()).To(Equal([]tbp.Message{fixture, fixture}))
68
51
  Expect(subject.Close()).To(Succeed())
69
52
  })
70
53
  })
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'feedx'
3
- s.version = '0.3.2'
3
+ s.version = '0.4.0'
4
4
  s.authors = ['Black Square Media Ltd']
5
5
  s.email = ['info@blacksquaremedia.com']
6
6
  s.summary = %(Exchange data between components via feeds)
data/feedx.go CHANGED
@@ -1,3 +1,57 @@
1
1
  package feedx
2
2
 
3
- const lastModifiedMetaKey = "x-feedx-pusher-last-modified"
3
+ import (
4
+ "context"
5
+ "errors"
6
+ "strconv"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // ErrNotModified is used to signal that something has not been modified.
13
+ var ErrNotModified = errors.New("feedx: not modified")
14
+
15
+ const (
16
+ metaLastModified = "x-feedx-last-modified"
17
+ metaPusherLastModified = "x-feedx-pusher-last-modified"
18
+ )
19
+
20
+ // Timestamp with millisecond resolution
21
+ type timestamp int64
22
+
23
+ func timestampFromTime(t time.Time) timestamp {
24
+ if n := t.Unix()*1000 + int64(t.Nanosecond()/1e6); n > 0 {
25
+ return timestamp(n)
26
+ }
27
+ return 0
28
+ }
29
+
30
+ func remoteLastModified(ctx context.Context, obj *bfs.Object) (timestamp, error) {
31
+ info, err := obj.Head(ctx)
32
+ if err == bfs.ErrNotFound {
33
+ return 0, nil
34
+ } else if err != nil {
35
+ return 0, err
36
+ }
37
+
38
+ millis, _ := strconv.ParseInt(info.Metadata[metaLastModified], 10, 64)
39
+ if millis == 0 {
40
+ millis, _ = strconv.ParseInt(info.Metadata[metaPusherLastModified], 10, 64)
41
+ }
42
+ return timestamp(millis), nil
43
+ }
44
+
45
+ // Millis returns the number of milliseconds since epoch.
46
+ func (t timestamp) Millis() int64 { return int64(t) }
47
+
48
+ // Time returns the time at t.
49
+ func (t timestamp) Time() time.Time {
50
+ n := t.Millis()
51
+ return time.Unix(n/1000, n%1000*1e6)
52
+ }
53
+
54
+ // String returns a string of milliseconds.
55
+ func (t timestamp) String() string {
56
+ return strconv.FormatInt(int64(t), 10)
57
+ }
@@ -4,8 +4,11 @@ import (
4
4
  "context"
5
5
  "net/url"
6
6
  "testing"
7
+ "time"
7
8
 
8
9
  "github.com/bsm/bfs"
10
+ "github.com/bsm/feedx"
11
+ tbp "github.com/golang/protobuf/proto/proto3_proto"
9
12
  . "github.com/onsi/ginkgo"
10
13
  . "github.com/onsi/gomega"
11
14
  )
@@ -21,6 +24,30 @@ func init() {
21
24
  })
22
25
  }
23
26
 
27
+ var fixture = tbp.Message{
28
+ Name: "Joe",
29
+ Hilarity: tbp.Message_BILL_BAILEY,
30
+ HeightInCm: 180,
31
+ }
32
+
33
+ func writeMulti(obj *bfs.Object, numEntries int) error {
34
+ w, err := feedx.NewWriter(context.Background(), obj, &feedx.WriterOptions{
35
+ LastMod: time.Unix(1515151515, 123456789),
36
+ })
37
+ if err != nil {
38
+ return err
39
+ }
40
+ defer w.Close()
41
+
42
+ for i := 0; i < numEntries; i++ {
43
+ fix := fixture
44
+ if err := w.Encode(&fix); err != nil {
45
+ return err
46
+ }
47
+ }
48
+ return w.Close()
49
+ }
50
+
24
51
  func TestSuite(t *testing.T) {
25
52
  RegisterFailHandler(Fail)
26
53
  RunSpecs(t, "feedx")
@@ -11,12 +11,6 @@ import (
11
11
  )
12
12
 
13
13
  var _ = Describe("Format", func() {
14
- msg := &tbp.Message{
15
- Name: "Joe",
16
- TrueScotsman: true,
17
- Hilarity: tbp.Message_BILL_BAILEY,
18
- }
19
-
20
14
  runSharedTest := func(subject feedx.Format) {
21
15
  buf := new(bytes.Buffer)
22
16
 
@@ -24,8 +18,9 @@ var _ = Describe("Format", func() {
24
18
  Expect(err).NotTo(HaveOccurred())
25
19
  defer enc.Close()
26
20
 
27
- Expect(enc.Encode(msg)).To(Succeed())
28
- Expect(enc.Encode(msg)).To(Succeed())
21
+ fix := fixture
22
+ Expect(enc.Encode(&fix)).To(Succeed())
23
+ Expect(enc.Encode(&fix)).To(Succeed())
29
24
  Expect(enc.Close()).To(Succeed())
30
25
 
31
26
  dec, err := subject.NewDecoder(buf)
@@ -1,6 +1,9 @@
1
1
  module Feedx
2
+ META_LAST_MODIFIED = 'x-feedx-last-modified'.freeze
3
+
2
4
  autoload :Compression, 'feedx/compression'
3
5
  autoload :Format, 'feedx/format'
6
+ autoload :Producer, 'feedx/producer'
4
7
  autoload :Pusher, 'feedx/pusher'
5
8
  autoload :TaskState, 'feedx/task_state'
6
9
  end
@@ -0,0 +1,84 @@
1
+ require 'uri'
2
+ require 'bfs'
3
+ require 'feedx'
4
+
5
+ module Feedx
6
+ # Produces a relation as am encoded stream to a remote location.
7
+ class Producer
8
+ # See constructor.
9
+ def self.perform(url, opts={}, &block)
10
+ new(url, opts, &block).perform
11
+ end
12
+
13
+ # @param [String] url the destination URL.
14
+ # @param [Hash] opts options
15
+ # @option opts [Enumerable,ActiveRecord::Relation] :enum relation or enumerator to stream.
16
+ # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
17
+ # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
18
+ # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
19
+ # @yield A block factory to generate the relation or enumerator.
20
+ # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
21
+ def initialize(url, opts={}, &block)
22
+ @enum = opts[:enum] || block
23
+ raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
24
+
25
+ @blob = BFS::Blob.new(url)
26
+ @format = detect_format(opts[:format])
27
+ @compress = detect_compress(opts[:compress])
28
+ @last_mod = opts[:last_modified]
29
+ end
30
+
31
+ def perform
32
+ enum = @enum.is_a?(Proc) ? @enum.call : @enum
33
+ last_mod = @last_mod.is_a?(Proc) ? @last_mod.call(enum) : @last_mod
34
+ current = (last_mod.to_f * 1000).floor
35
+
36
+ begin
37
+ previous = @blob.info.metadata[META_LAST_MODIFIED].to_i
38
+ return -1 unless current > previous
39
+ rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
40
+ end if current.positive?
41
+
42
+ @blob.create metadata: { META_LAST_MODIFIED => current.to_s } do |io|
43
+ @compress.wrap(io) {|w| write_all(enum, w) }
44
+ end
45
+ @blob.info.size
46
+ end
47
+
48
+ private
49
+
50
+ def detect_format(val)
51
+ case val
52
+ when nil
53
+ Feedx::Format.detect(@blob.path)
54
+ when Class
55
+ parent = Feedx::Format::Abstract
56
+ raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
57
+
58
+ val
59
+ else
60
+ Feedx::Format.resolve(val)
61
+ end
62
+ end
63
+
64
+ def detect_compress(val)
65
+ case val
66
+ when nil
67
+ Feedx::Compression.detect(@blob.path)
68
+ when Class
69
+ parent = Feedx::Compression::Abstract
70
+ raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
71
+
72
+ val
73
+ else
74
+ Feedx::Compression.resolve(val)
75
+ end
76
+ end
77
+
78
+ def write_all(enum, io)
79
+ stream = @format.new(io)
80
+ iterator = enum.respond_to?(:find_each) ? :find_each : :each
81
+ enum.send(iterator) {|rec| stream.write(rec) }
82
+ end
83
+ end
84
+ end
@@ -1,85 +1,8 @@
1
- require 'uri'
2
- require 'bfs'
1
+ require 'feedx'
3
2
 
4
3
  module Feedx
5
- # Pushes a relation as a protobuf encoded stream to an S3 location.
6
- class Pusher
7
- META_LAST_MODIFIED = 'x-feedx-pusher-last-modified'.freeze
8
-
9
- # See constructor.
10
- def self.perform(url, opts={}, &block)
11
- new(url, opts, &block).perform
12
- end
13
-
14
- # @param [String] url the destination URL.
15
- # @param [Hash] opts options
16
- # @option opts [Enumerable,ActiveRecord::Relation] :enum relation or enumerator to stream.
17
- # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
18
- # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
19
- # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
20
- # @yield A block factory to generate the relation or enumerator.
21
- # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
22
- def initialize(url, opts={}, &block)
23
- @enum = opts[:enum] || block
24
- raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
25
-
26
- @blob = BFS::Blob.new(url)
27
- @format = detect_format(opts[:format])
28
- @compress = detect_compress(opts[:compress])
29
- @last_mod = opts[:last_modified]
30
- end
31
-
32
- def perform
33
- enum = @enum.is_a?(Proc) ? @enum.call : @enum
34
- last_mod = @last_mod.is_a?(Proc) ? @last_mod.call(enum) : @last_mod
35
- current = (last_mod.to_f * 1000).floor
36
-
37
- begin
38
- previous = @blob.info.metadata[META_LAST_MODIFIED].to_i
39
- return -1 unless current > previous
40
- rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
41
- end if current.positive?
42
-
43
- @blob.create metadata: { META_LAST_MODIFIED => current.to_s } do |io|
44
- @compress.wrap(io) {|w| write_all(enum, w) }
45
- end
46
- @blob.info.size
47
- end
48
-
49
- private
50
-
51
- def detect_format(val)
52
- case val
53
- when nil
54
- Feedx::Format.detect(@blob.path)
55
- when Class
56
- parent = Feedx::Format::Abstract
57
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
58
-
59
- val
60
- else
61
- Feedx::Format.resolve(val)
62
- end
63
- end
64
-
65
- def detect_compress(val)
66
- case val
67
- when nil
68
- Feedx::Compression.detect(@blob.path)
69
- when Class
70
- parent = Feedx::Compression::Abstract
71
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
72
-
73
- val
74
- else
75
- Feedx::Compression.resolve(val)
76
- end
77
- end
78
-
79
- def write_all(enum, io)
80
- stream = @format.new(io)
81
- iterator = enum.respond_to?(:find_each) ? :find_each : :each
82
- enum.send(iterator) {|rec| stream.write(rec) }
83
- end
4
+ unless defined?(Gem::Deprecate) && Gem::Deprecate.skip
5
+ warn "WARNING: Feedx::Pusher is deprecated; use Feedx::Producer instead (called from #{caller(2..2).first})."
84
6
  end
7
+ Pusher = Producer
85
8
  end
@@ -0,0 +1,130 @@
1
+ package feedx
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "io"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // ReaderOptions configure the reader instance.
13
+ type ReaderOptions struct {
14
+ // Format specifies the format
15
+ // Default: auto-detected from URL path.
16
+ Format Format
17
+
18
+ // Compression specifies the compression type.
19
+ // Default: auto-detected from URL path.
20
+ Compression Compression
21
+ }
22
+
23
+ func (o *ReaderOptions) norm(name string) error {
24
+ if o.Format == nil {
25
+ o.Format = DetectFormat(name)
26
+
27
+ if o.Format == nil {
28
+ return fmt.Errorf("feedx: unable to detect format from %q", name)
29
+ }
30
+ }
31
+ if o.Compression == nil {
32
+ o.Compression = DetectCompression(name)
33
+ }
34
+ return nil
35
+ }
36
+
37
+ // Reader reads data from a remote feed.
38
+ type Reader struct {
39
+ remote *bfs.Object
40
+ opt ReaderOptions
41
+ ctx context.Context
42
+ num int
43
+
44
+ br io.ReadCloser // bfs reader
45
+ cr io.ReadCloser // compression reader
46
+ fd FormatDecoder
47
+ }
48
+
49
+ // NewReader inits a new reader.
50
+ func NewReader(ctx context.Context, remote *bfs.Object, opt *ReaderOptions) (*Reader, error) {
51
+ var o ReaderOptions
52
+ if opt != nil {
53
+ o = *opt
54
+ }
55
+ if err := o.norm(remote.Name()); err != nil {
56
+ return nil, err
57
+ }
58
+
59
+ return &Reader{
60
+ remote: remote,
61
+ opt: o,
62
+ ctx: ctx,
63
+ }, nil
64
+ }
65
+
66
+ // Decode decodes the next value from the feed.
67
+ func (r *Reader) Decode(v interface{}) error {
68
+ if r.br == nil {
69
+ br, err := r.remote.Open(r.ctx)
70
+ if err != nil {
71
+ return err
72
+ }
73
+ r.br = br
74
+ }
75
+
76
+ if r.cr == nil {
77
+ cr, err := r.opt.Compression.NewReader(r.br)
78
+ if err != nil {
79
+ return err
80
+ }
81
+ r.cr = cr
82
+ }
83
+
84
+ if r.fd == nil {
85
+ fd, err := r.opt.Format.NewDecoder(r.cr)
86
+ if err != nil {
87
+ return err
88
+ }
89
+ r.fd = fd
90
+ }
91
+
92
+ if err := r.fd.Decode(v); err != nil {
93
+ return err
94
+ }
95
+
96
+ r.num++
97
+ return nil
98
+ }
99
+
100
+ // NumRead returns the number of read values.
101
+ func (r *Reader) NumRead() int {
102
+ return r.num
103
+ }
104
+
105
+ // LastModified returns the last modified time of the remote feed.
106
+ func (r *Reader) LastModified() (time.Time, error) {
107
+ lastMod, err := remoteLastModified(r.ctx, r.remote)
108
+ return lastMod.Time(), err
109
+ }
110
+
111
+ // Close closes the reader.
112
+ func (r *Reader) Close() error {
113
+ var err error
114
+ if r.fd != nil {
115
+ if e := r.fd.Close(); e != nil {
116
+ err = e
117
+ }
118
+ }
119
+ if r.cr != nil {
120
+ if e := r.cr.Close(); e != nil {
121
+ err = e
122
+ }
123
+ }
124
+ if r.br != nil {
125
+ if e := r.br.Close(); e != nil {
126
+ err = e
127
+ }
128
+ }
129
+ return err
130
+ }
@@ -0,0 +1,48 @@
1
+ package feedx_test
2
+
3
+ import (
4
+ "context"
5
+ "io"
6
+
7
+ "github.com/bsm/feedx"
8
+
9
+ "github.com/bsm/bfs"
10
+ tbp "github.com/golang/protobuf/proto/proto3_proto"
11
+ . "github.com/onsi/ginkgo"
12
+ . "github.com/onsi/gomega"
13
+ )
14
+
15
+ var _ = Describe("Reader", func() {
16
+ var subject *feedx.Reader
17
+ var obj *bfs.Object
18
+ var ctx = context.Background()
19
+
20
+ BeforeEach(func() {
21
+ obj = bfs.NewInMemObject("path/to/file.json")
22
+ Expect(writeMulti(obj, 3)).To(Succeed())
23
+
24
+ var err error
25
+ subject, err = feedx.NewReader(ctx, obj, nil)
26
+ Expect(err).NotTo(HaveOccurred())
27
+ })
28
+
29
+ AfterEach(func() {
30
+ Expect(subject.Close()).To(Succeed())
31
+ })
32
+
33
+ It("should read", func() {
34
+ var msgs []tbp.Message
35
+ for {
36
+ var msg tbp.Message
37
+ err := subject.Decode(&msg)
38
+ if err == io.EOF {
39
+ break
40
+ }
41
+ Expect(err).NotTo(HaveOccurred())
42
+ msgs = append(msgs, msg)
43
+ }
44
+
45
+ Expect(msgs).To(Equal([]tbp.Message{fixture, fixture, fixture}))
46
+ Expect(subject.NumRead()).To(Equal(3))
47
+ })
48
+ })
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe Feedx::Pusher do
3
+ RSpec.describe Feedx::Producer do
4
4
  let :model do
5
5
  Class.new Struct.new(:title) do
6
6
  def to_pb
@@ -60,7 +60,7 @@ RSpec.describe Feedx::Pusher do
60
60
 
61
61
  it 'should support last-modified' do
62
62
  described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
63
- expect(bucket.info('dir/file.json').metadata).to eq('x-feedx-pusher-last-modified' => '1515151515000')
63
+ expect(bucket.info('dir/file.json').metadata).to eq('x-feedx-last-modified' => '1515151515000')
64
64
  end
65
65
 
66
66
  it 'should perform conditionally' do
@@ -0,0 +1,135 @@
1
+ package feedx
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "io"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // WriterOptions configure the producer instance.
13
+ type WriterOptions struct {
14
+ // Format specifies the format
15
+ // Default: auto-detected from URL path.
16
+ Format Format
17
+
18
+ // Compression specifies the compression type.
19
+ // Default: auto-detected from URL path.
20
+ Compression Compression
21
+
22
+ // Provides an optional last modified timestamp which is stored with the remote metadata.
23
+ // Default: time.Now().
24
+ LastMod time.Time
25
+ }
26
+
27
+ func (o *WriterOptions) norm(name string) error {
28
+ if o.Format == nil {
29
+ o.Format = DetectFormat(name)
30
+
31
+ if o.Format == nil {
32
+ return fmt.Errorf("feedx: unable to detect format from %q", name)
33
+ }
34
+ }
35
+
36
+ if o.Compression == nil {
37
+ o.Compression = DetectCompression(name)
38
+ }
39
+
40
+ if o.LastMod.IsZero() {
41
+ o.LastMod = time.Now()
42
+ }
43
+
44
+ return nil
45
+ }
46
+
47
+ // Writer encodes feeds to remote locations.
48
+ type Writer struct {
49
+ ctx context.Context
50
+ remote *bfs.Object
51
+ opt WriterOptions
52
+ num int
53
+
54
+ bw io.WriteCloser // bfs writer
55
+ cw io.WriteCloser // compression writer
56
+ fe FormatEncoder
57
+ }
58
+
59
+ // NewWriter inits a new feed writer.
60
+ func NewWriter(ctx context.Context, remote *bfs.Object, opt *WriterOptions) (*Writer, error) {
61
+ var o WriterOptions
62
+ if opt != nil {
63
+ o = *opt
64
+ }
65
+ o.norm(remote.Name())
66
+
67
+ return &Writer{
68
+ ctx: ctx,
69
+ remote: remote,
70
+ opt: o,
71
+ }, nil
72
+ }
73
+
74
+ // Encode appends a value to the feed.
75
+ func (w *Writer) Encode(v interface{}) error {
76
+ if w.bw == nil {
77
+ ts := timestampFromTime(w.opt.LastMod)
78
+ bw, err := w.remote.Create(w.ctx, &bfs.WriteOptions{
79
+ Metadata: map[string]string{metaLastModified: ts.String()},
80
+ })
81
+ if err != nil {
82
+ return err
83
+ }
84
+ w.bw = bw
85
+ }
86
+
87
+ if w.cw == nil {
88
+ cw, err := w.opt.Compression.NewWriter(w.bw)
89
+ if err != nil {
90
+ return err
91
+ }
92
+ w.cw = cw
93
+ }
94
+
95
+ if w.fe == nil {
96
+ fe, err := w.opt.Format.NewEncoder(w.cw)
97
+ if err != nil {
98
+ return err
99
+ }
100
+ w.fe = fe
101
+ }
102
+
103
+ if err := w.fe.Encode(v); err != nil {
104
+ return err
105
+ }
106
+
107
+ w.num++
108
+ return nil
109
+ }
110
+
111
+ // NumWritten returns the number of written values.
112
+ func (w *Writer) NumWritten() int {
113
+ return w.num
114
+ }
115
+
116
+ // Close closes the writer.
117
+ func (w *Writer) Close() error {
118
+ var err error
119
+ if w.fe != nil {
120
+ if e := w.fe.Close(); e != nil {
121
+ err = e
122
+ }
123
+ }
124
+ if w.cw != nil {
125
+ if e := w.cw.Close(); e != nil {
126
+ err = e
127
+ }
128
+ }
129
+ if w.bw != nil {
130
+ if e := w.bw.Close(); e != nil {
131
+ err = e
132
+ }
133
+ }
134
+ return err
135
+ }
@@ -0,0 +1,34 @@
1
+ package feedx_test
2
+
3
+ import (
4
+ "context"
5
+
6
+ "github.com/bsm/bfs"
7
+ . "github.com/onsi/ginkgo"
8
+ . "github.com/onsi/gomega"
9
+ )
10
+
11
+ var _ = Describe("Writer", func() {
12
+ var plain, compressed *bfs.Object
13
+ var ctx = context.Background()
14
+
15
+ BeforeEach(func() {
16
+ plain = bfs.NewInMemObject("path/to/file.json")
17
+ compressed = bfs.NewInMemObject("path/to/file.jsonz")
18
+ })
19
+
20
+ It("should encode", func() {
21
+ Expect(writeMulti(plain, 10)).To(Succeed())
22
+ Expect(writeMulti(compressed, 10)).To(Succeed())
23
+
24
+ info, err := plain.Head(ctx)
25
+ Expect(err).NotTo(HaveOccurred())
26
+ Expect(info.Size).To(BeNumerically("~", 470, 10))
27
+ Expect(info.Metadata).To(Equal(map[string]string{"x-feedx-last-modified": "1515151515123"}))
28
+
29
+ info, err = compressed.Head(ctx)
30
+ Expect(err).NotTo(HaveOccurred())
31
+ Expect(info.Size).To(BeNumerically("~", 76, 10))
32
+ Expect(info.Metadata).To(Equal(map[string]string{"x-feedx-last-modified": "1515151515123"}))
33
+ })
34
+ })
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-14 00:00:00.000000000 Z
11
+ date: 2018-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -131,15 +131,20 @@ files:
131
131
  - lib/feedx/format/abstract.rb
132
132
  - lib/feedx/format/json.rb
133
133
  - lib/feedx/format/protobuf.rb
134
+ - lib/feedx/producer.rb
134
135
  - lib/feedx/pusher.rb
136
+ - reader.go
137
+ - reader_test.go
135
138
  - spec/feedx/compression/gzip_spec.rb
136
139
  - spec/feedx/compression/none_spec.rb
137
140
  - spec/feedx/compression_spec.rb
138
141
  - spec/feedx/format/json_spec.rb
139
142
  - spec/feedx/format/protobuf_spec.rb
140
143
  - spec/feedx/format_spec.rb
141
- - spec/feedx/pusher_spec.rb
144
+ - spec/feedx/producer_spec.rb
142
145
  - spec/spec_helper.rb
146
+ - writer.go
147
+ - writer_test.go
143
148
  homepage: https://github.com/bsm/feedx
144
149
  licenses:
145
150
  - Apache-2.0
@@ -171,5 +176,5 @@ test_files:
171
176
  - spec/feedx/format/json_spec.rb
172
177
  - spec/feedx/format/protobuf_spec.rb
173
178
  - spec/feedx/format_spec.rb
174
- - spec/feedx/pusher_spec.rb
179
+ - spec/feedx/producer_spec.rb
175
180
  - spec/spec_helper.rb