feedx 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7d1c12376b32188b646923935967905bb97ab8c86dde28236a0341ee878eb631
4
- data.tar.gz: 3c76d4356a0f6aafcad0e8ea37a2159f8170f5d1cea4072753e3c9947afd7613
3
+ metadata.gz: 67b4f1f345ff01e33e63e66daed1810694330cfe9158b60886f29ae6f98d169c
4
+ data.tar.gz: 684f5f44a347d13cc4f71aa31741e1fe5c477205d870a0db96555364b166b235
5
5
  SHA512:
6
- metadata.gz: d55b8ee3620301c3ffc5c95d2895d00f54ee562250c4aa8236ed93fd99e552cd67c126feab4a17b063415518e46b113ef918491b19e37043b5d36fa9e09c2808
7
- data.tar.gz: b8eee064c4e12e462097087431cdd9b435720eead2d7524c9cc7d44cb4a7dde410756f443b71cd5b24261d3b06c95f1d3975ddca6b6ff69692e0e9a77a742446
6
+ metadata.gz: 0a8a9c98a96d79644a1e36d9035cc17f92e3b41c372424fd4a46180e9517da0090c8a123d30d8bfd959a9726ee92b9b71dbfa0a7992fd3171642b1bfd3fa364e
7
+ data.tar.gz: ba6dcde4c14857f3f024a558ba64050d7eca53cafb66599fef5e39e1a9b3ed6e036fb1f94e43217a77d7cba47cd307b21ea5145a25f51131feeace142976a6b8
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feedx (0.3.2)
4
+ feedx (0.4.0)
5
5
  bfs (>= 0.3.4)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -5,15 +5,15 @@
5
5
 
6
6
  Feed-based data exchange between services.
7
7
 
8
- ## Usage
8
+ ## Usage (Ruby)
9
9
 
10
10
  ```ruby
11
11
  require 'bfs/s3'
12
12
  require 'feedx'
13
13
 
14
- # Init a new pusher with an S3 destination
14
+ # Init a new producer with an S3 destination
15
15
  relation = Post.includes(:author)
16
- pusher = Feedx::Pusher.new relation, 's3://my-bucket/feeds/users.json.gz'
16
+ producer = Feedx::Producer.new relation, 's3://my-bucket/feeds/users.json.gz'
17
17
 
18
18
  # Push a new feed every hour
19
19
  loop do
@@ -2,91 +2,84 @@ package feedx
2
2
 
3
3
  import (
4
4
  "context"
5
- "fmt"
6
- "strconv"
7
5
  "sync/atomic"
8
6
  "time"
9
7
 
10
8
  "github.com/bsm/bfs"
11
9
  )
12
10
 
13
- // ConsumerOptions configure the Puller instance.
11
+ // ConsumerOptions configure the consumer instance.
14
12
  type ConsumerOptions struct {
15
- // The interval used by Puller to check the remote changes.
13
+ ReaderOptions
14
+
15
+ // The interval used by consumer to check the remote changes.
16
16
  // Default: 1m
17
17
  Interval time.Duration
18
18
 
19
- // Format specifies the format
20
- // Default: auto-detected from URL path.
21
- Format Format
22
-
23
- // Compression specifies the compression type.
24
- // Default: auto-detected from URL path.
25
- Compression Compression
26
-
27
19
  // AfterSync callbacks are triggered after each sync, receiving
28
20
  // the updated status and error (if occurred).
29
21
  AfterSync func(updated bool, err error)
30
22
  }
31
23
 
32
24
  func (o *ConsumerOptions) norm(name string) error {
25
+ o.ReaderOptions.norm(name)
33
26
  if o.Interval <= 0 {
34
27
  o.Interval = time.Minute
35
28
  }
36
- if o.Format == nil {
37
- o.Format = DetectFormat(name)
38
-
39
- if o.Format == nil {
40
- return fmt.Errorf("feedx: unable to detect format from %q", name)
41
- }
42
- }
43
- if o.Compression == nil {
44
- o.Compression = DetectCompression(name)
45
- }
46
29
  return nil
47
30
  }
48
31
 
49
- // ParseFunc is a data parse function.
50
- type ParseFunc func(FormatDecoder) (data interface{}, size int64, err error)
32
+ // ConsumeFunc is a parsing callback which is run by the consumer every sync interval.
33
+ type ConsumeFunc func(FormatDecoder) (data interface{}, err error)
51
34
 
52
35
  // Consumer manages data retrieval from a remote feed.
53
36
  // It queries the feed in regular intervals, continuously retrieving new updates.
54
37
  type Consumer interface {
55
- // Data returns the data as returned by ParseFunc on last sync.
38
+ // Data returns the data as returned by ConsumeFunc on last sync.
56
39
  Data() interface{}
57
- // LastCheck returns time of last sync attempt.
58
- LastCheck() time.Time
40
+ // LastSync returns time of last sync attempt.
41
+ LastSync() time.Time
59
42
  // LastModified returns time at which the remote feed was last modified.
60
43
  LastModified() time.Time
61
- // Size returns the size as returned by ParseFunc on last sync.
62
- Size() int64
44
+ // NumRead returns the number of values consumed during the last sync.
45
+ NumRead() int
63
46
  // Close stops the underlying sync process.
64
47
  Close() error
65
48
  }
66
49
 
67
50
  // NewConsumer starts a new feed consumer.
68
- func NewConsumer(ctx context.Context, srcURL string, opt *ConsumerOptions, parse ParseFunc) (Consumer, error) {
69
- src, err := bfs.NewObject(ctx, srcURL)
51
+ func NewConsumer(ctx context.Context, remoteURL string, opt *ConsumerOptions, cfn ConsumeFunc) (Consumer, error) {
52
+ remote, err := bfs.NewObject(ctx, remoteURL)
53
+ if err != nil {
54
+ return nil, err
55
+ }
56
+
57
+ csm, err := NewConsumerForRemote(ctx, remote, opt, cfn)
70
58
  if err != nil {
59
+ _ = remote.Close()
71
60
  return nil, err
72
61
  }
62
+ csm.(*consumer).ownRemote = true
63
+ return csm, nil
64
+ }
73
65
 
66
+ // NewConsumerForRemote starts a new feed consumer with a remote.
67
+ func NewConsumerForRemote(ctx context.Context, remote *bfs.Object, opt *ConsumerOptions, cfn ConsumeFunc) (Consumer, error) {
74
68
  var o ConsumerOptions
75
69
  if opt != nil {
76
70
  o = *opt
77
71
  }
78
- if err := o.norm(src.Name()); err != nil {
79
- _ = src.Close()
72
+ if err := o.norm(remote.Name()); err != nil {
80
73
  return nil, err
81
74
  }
82
75
 
83
76
  ctx, stop := context.WithCancel(ctx)
84
77
  f := &consumer{
85
- src: src,
86
- opt: o,
87
- ctx: ctx,
88
- stop: stop,
89
- parse: parse,
78
+ remote: remote,
79
+ opt: o,
80
+ ctx: ctx,
81
+ stop: stop,
82
+ cfn: cfn,
90
83
  }
91
84
 
92
85
  // run initial sync
@@ -102,91 +95,81 @@ func NewConsumer(ctx context.Context, srcURL string, opt *ConsumerOptions, parse
102
95
  }
103
96
 
104
97
  type consumer struct {
105
- src *bfs.Object
98
+ remote *bfs.Object
99
+ ownRemote bool
100
+
106
101
  opt ConsumerOptions
107
102
  ctx context.Context
108
103
  stop context.CancelFunc
109
104
 
110
- parse ParseFunc
105
+ cfn ConsumeFunc
106
+ data atomic.Value
111
107
 
112
- size, lastModMs int64
113
- data, lastCheck atomic.Value
108
+ numRead, lastMod, lastSync int64
114
109
  }
115
110
 
116
- // Data implements Feed interface.
111
+ // Data implements Consumer interface.
117
112
  func (f *consumer) Data() interface{} {
118
113
  return f.data.Load()
119
114
  }
120
115
 
121
- // Size implements Feed interface.
122
- func (f *consumer) Size() int64 {
123
- return atomic.LoadInt64(&f.size)
116
+ // NumRead implements Consumer interface.
117
+ func (f *consumer) NumRead() int {
118
+ return int(atomic.LoadInt64(&f.numRead))
124
119
  }
125
120
 
126
- // LastCheck implements Feed interface.
127
- func (f *consumer) LastCheck() time.Time {
128
- return f.lastCheck.Load().(time.Time)
121
+ // LastSync implements Consumer interface.
122
+ func (f *consumer) LastSync() time.Time {
123
+ return timestamp(atomic.LoadInt64(&f.lastSync)).Time()
129
124
  }
130
125
 
131
- // LastModified implements Feed interface.
126
+ // LastModified implements Consumer interface.
132
127
  func (f *consumer) LastModified() time.Time {
133
- msec := atomic.LoadInt64(&f.lastModMs)
134
- return time.Unix(msec/1000, msec%1000*1e6)
128
+ return timestamp(atomic.LoadInt64(&f.lastMod)).Time()
135
129
  }
136
130
 
137
- // Close implements Feed interface.
131
+ // Close implements Consumer interface.
138
132
  func (f *consumer) Close() error {
139
133
  f.stop()
140
- return f.src.Close()
134
+ if f.ownRemote {
135
+ return f.remote.Close()
136
+ }
137
+ return nil
141
138
  }
142
139
 
143
140
  func (f *consumer) sync(force bool) (bool, error) {
144
- f.lastCheck.Store(time.Now())
141
+ defer func() {
142
+ atomic.StoreInt64(&f.lastSync, timestampFromTime(time.Now()).Millis())
143
+ }()
145
144
 
146
- info, err := f.src.Head(f.ctx)
145
+ // retrieve original last modified time
146
+ lastMod, err := remoteLastModified(f.ctx, f.remote)
147
147
  if err != nil {
148
148
  return false, err
149
149
  }
150
150
 
151
- // calculate last modified time
152
- msec, _ := strconv.ParseInt(info.Metadata[lastModifiedMetaKey], 10, 64)
153
-
154
151
  // skip update if not forced or modified
155
- if msec == atomic.LoadInt64(&f.lastModMs) && !force {
152
+ if lastMod.Millis() == atomic.LoadInt64(&f.lastMod) && !force {
156
153
  return false, nil
157
154
  }
158
155
 
159
- // open remote for reading
160
- r, err := f.src.Open(f.ctx)
161
- if err != nil {
162
- return false, err
163
- }
164
- defer r.Close()
165
-
166
- // wrap in compressed reader
167
- c, err := f.opt.Compression.NewReader(r)
168
- if err != nil {
169
- return false, err
170
- }
171
- defer c.Close()
172
-
173
- // open decoder
174
- d, err := f.opt.Format.NewDecoder(c)
156
+ // open remote reader
157
+ reader, err := NewReader(f.ctx, f.remote, &f.opt.ReaderOptions)
175
158
  if err != nil {
176
159
  return false, err
177
160
  }
178
- defer f.Close()
161
+ defer reader.Close()
179
162
 
180
- // parse feed
181
- data, size, err := f.parse(d)
163
+ // consume feed
164
+ data, err := f.cfn(reader)
182
165
  if err != nil {
183
166
  return false, err
184
167
  }
185
168
 
186
169
  // update stores
187
170
  f.data.Store(data)
188
- atomic.StoreInt64(&f.size, size)
189
- atomic.StoreInt64(&f.lastModMs, msec)
171
+ atomic.StoreInt64(&f.numRead, int64(reader.NumRead()))
172
+ atomic.StoreInt64(&f.lastMod, lastMod.Millis())
190
173
  return true, nil
191
174
  }
192
175
 
@@ -200,7 +183,9 @@ func (f *consumer) loop() {
200
183
  return
201
184
  case <-ticker.C:
202
185
  updated, err := f.sync(false)
203
- f.opt.AfterSync(updated, err)
186
+ if f.opt.AfterSync != nil {
187
+ f.opt.AfterSync(updated, err)
188
+ }
204
189
  }
205
190
  }
206
191
  }
@@ -13,58 +13,41 @@ import (
13
13
  )
14
14
 
15
15
  var _ = Describe("Consumer", func() {
16
- ctx := context.Background()
17
- msg := &tbp.Message{
18
- Name: "Joe",
19
- TrueScotsman: true,
20
- Hilarity: tbp.Message_BILL_BAILEY,
21
- }
22
- pfn := func(dec feedx.FormatDecoder) (interface{}, int64, error) {
23
- var msgs []*tbp.Message
24
- for {
25
- msg := new(tbp.Message)
26
- if err := dec.Decode(msg); err == io.EOF {
27
- break
28
- } else if err != nil {
29
- return nil, 0, err
30
- }
31
- msgs = append(msgs, msg)
32
- }
33
- return msgs, int64(len(msgs)), nil
34
- }
16
+ var subject feedx.Consumer
17
+ var obj *bfs.Object
18
+ var ctx = context.Background()
35
19
 
36
20
  BeforeEach(func() {
37
- memStore = bfs.NewInMem()
38
- w, err := memStore.Create(ctx, "path/to/file.jsonz", &bfs.WriteOptions{
39
- Metadata: map[string]string{"x-feedx-pusher-last-modified": "1544477788899"},
21
+ obj = bfs.NewInMemObject("path/to/file.jsonz")
22
+ Expect(writeMulti(obj, 2)).To(Succeed())
23
+
24
+ var err error
25
+ subject, err = feedx.NewConsumerForRemote(ctx, obj, nil, func(dec feedx.FormatDecoder) (interface{}, error) {
26
+ var msgs []tbp.Message
27
+ for {
28
+ var msg tbp.Message
29
+ if err := dec.Decode(&msg); err == io.EOF {
30
+ break
31
+ }
32
+ if err != nil {
33
+ return nil, err
34
+ }
35
+ msgs = append(msgs, msg)
36
+ }
37
+ return msgs, nil
40
38
  })
41
39
  Expect(err).NotTo(HaveOccurred())
42
- defer w.Close()
43
-
44
- c, err := feedx.GZipCompression.NewWriter(w)
45
- Expect(err).NotTo(HaveOccurred())
46
- defer c.Close()
47
-
48
- f, err := feedx.JSONFormat.NewEncoder(c)
49
- Expect(err).NotTo(HaveOccurred())
50
- defer f.Close()
40
+ })
51
41
 
52
- Expect(f.Encode(msg)).To(Succeed())
53
- Expect(f.Encode(msg)).To(Succeed())
54
- Expect(f.Close()).To(Succeed())
55
- Expect(c.Close()).To(Succeed())
56
- Expect(w.Close()).To(Succeed())
42
+ AfterEach(func() {
43
+ Expect(subject.Close()).To(Succeed())
57
44
  })
58
45
 
59
46
  It("should sync and retrieve feeds from remote", func() {
60
- subject, err := feedx.NewConsumer(ctx, "mem:///path/to/file.jsonz", nil, pfn)
61
- Expect(err).NotTo(HaveOccurred())
62
- defer subject.Close()
63
-
64
- Expect(subject.LastCheck()).To(BeTemporally("~", time.Now(), time.Second))
65
- Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1544477788, 0), time.Second))
66
- Expect(subject.Size()).To(Equal(int64(2)))
67
- Expect(subject.Data()).To(Equal([]*tbp.Message{msg, msg}))
47
+ Expect(subject.LastSync()).To(BeTemporally("~", time.Now(), time.Second))
48
+ Expect(subject.LastModified()).To(BeTemporally("~", time.Unix(1515151515, 0), time.Second))
49
+ Expect(subject.NumRead()).To(Equal(2))
50
+ Expect(subject.Data()).To(Equal([]tbp.Message{fixture, fixture}))
68
51
  Expect(subject.Close()).To(Succeed())
69
52
  })
70
53
  })
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'feedx'
3
- s.version = '0.3.2'
3
+ s.version = '0.4.0'
4
4
  s.authors = ['Black Square Media Ltd']
5
5
  s.email = ['info@blacksquaremedia.com']
6
6
  s.summary = %(Exchange data between components via feeds)
data/feedx.go CHANGED
@@ -1,3 +1,57 @@
1
1
  package feedx
2
2
 
3
- const lastModifiedMetaKey = "x-feedx-pusher-last-modified"
3
+ import (
4
+ "context"
5
+ "errors"
6
+ "strconv"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // ErrNotModified is used to signal that something has not been modified.
13
+ var ErrNotModified = errors.New("feedx: not modified")
14
+
15
+ const (
16
+ metaLastModified = "x-feedx-last-modified"
17
+ metaPusherLastModified = "x-feedx-pusher-last-modified"
18
+ )
19
+
20
+ // Timestamp with millisecond resolution
21
+ type timestamp int64
22
+
23
+ func timestampFromTime(t time.Time) timestamp {
24
+ if n := t.Unix()*1000 + int64(t.Nanosecond()/1e6); n > 0 {
25
+ return timestamp(n)
26
+ }
27
+ return 0
28
+ }
29
+
30
+ func remoteLastModified(ctx context.Context, obj *bfs.Object) (timestamp, error) {
31
+ info, err := obj.Head(ctx)
32
+ if err == bfs.ErrNotFound {
33
+ return 0, nil
34
+ } else if err != nil {
35
+ return 0, err
36
+ }
37
+
38
+ millis, _ := strconv.ParseInt(info.Metadata[metaLastModified], 10, 64)
39
+ if millis == 0 {
40
+ millis, _ = strconv.ParseInt(info.Metadata[metaPusherLastModified], 10, 64)
41
+ }
42
+ return timestamp(millis), nil
43
+ }
44
+
45
+ // Millis returns the number of milliseconds since epoch.
46
+ func (t timestamp) Millis() int64 { return int64(t) }
47
+
48
+ // Time returns the time at t.
49
+ func (t timestamp) Time() time.Time {
50
+ n := t.Millis()
51
+ return time.Unix(n/1000, n%1000*1e6)
52
+ }
53
+
54
+ // String returns a string of milliseconds.
55
+ func (t timestamp) String() string {
56
+ return strconv.FormatInt(int64(t), 10)
57
+ }
@@ -4,8 +4,11 @@ import (
4
4
  "context"
5
5
  "net/url"
6
6
  "testing"
7
+ "time"
7
8
 
8
9
  "github.com/bsm/bfs"
10
+ "github.com/bsm/feedx"
11
+ tbp "github.com/golang/protobuf/proto/proto3_proto"
9
12
  . "github.com/onsi/ginkgo"
10
13
  . "github.com/onsi/gomega"
11
14
  )
@@ -21,6 +24,30 @@ func init() {
21
24
  })
22
25
  }
23
26
 
27
+ var fixture = tbp.Message{
28
+ Name: "Joe",
29
+ Hilarity: tbp.Message_BILL_BAILEY,
30
+ HeightInCm: 180,
31
+ }
32
+
33
+ func writeMulti(obj *bfs.Object, numEntries int) error {
34
+ w, err := feedx.NewWriter(context.Background(), obj, &feedx.WriterOptions{
35
+ LastMod: time.Unix(1515151515, 123456789),
36
+ })
37
+ if err != nil {
38
+ return err
39
+ }
40
+ defer w.Close()
41
+
42
+ for i := 0; i < numEntries; i++ {
43
+ fix := fixture
44
+ if err := w.Encode(&fix); err != nil {
45
+ return err
46
+ }
47
+ }
48
+ return w.Close()
49
+ }
50
+
24
51
  func TestSuite(t *testing.T) {
25
52
  RegisterFailHandler(Fail)
26
53
  RunSpecs(t, "feedx")
@@ -11,12 +11,6 @@ import (
11
11
  )
12
12
 
13
13
  var _ = Describe("Format", func() {
14
- msg := &tbp.Message{
15
- Name: "Joe",
16
- TrueScotsman: true,
17
- Hilarity: tbp.Message_BILL_BAILEY,
18
- }
19
-
20
14
  runSharedTest := func(subject feedx.Format) {
21
15
  buf := new(bytes.Buffer)
22
16
 
@@ -24,8 +18,9 @@ var _ = Describe("Format", func() {
24
18
  Expect(err).NotTo(HaveOccurred())
25
19
  defer enc.Close()
26
20
 
27
- Expect(enc.Encode(msg)).To(Succeed())
28
- Expect(enc.Encode(msg)).To(Succeed())
21
+ fix := fixture
22
+ Expect(enc.Encode(&fix)).To(Succeed())
23
+ Expect(enc.Encode(&fix)).To(Succeed())
29
24
  Expect(enc.Close()).To(Succeed())
30
25
 
31
26
  dec, err := subject.NewDecoder(buf)
@@ -1,6 +1,9 @@
1
1
  module Feedx
2
+ META_LAST_MODIFIED = 'x-feedx-last-modified'.freeze
3
+
2
4
  autoload :Compression, 'feedx/compression'
3
5
  autoload :Format, 'feedx/format'
6
+ autoload :Producer, 'feedx/producer'
4
7
  autoload :Pusher, 'feedx/pusher'
5
8
  autoload :TaskState, 'feedx/task_state'
6
9
  end
@@ -0,0 +1,84 @@
1
+ require 'uri'
2
+ require 'bfs'
3
+ require 'feedx'
4
+
5
+ module Feedx
6
+ # Produces a relation as am encoded stream to a remote location.
7
+ class Producer
8
+ # See constructor.
9
+ def self.perform(url, opts={}, &block)
10
+ new(url, opts, &block).perform
11
+ end
12
+
13
+ # @param [String] url the destination URL.
14
+ # @param [Hash] opts options
15
+ # @option opts [Enumerable,ActiveRecord::Relation] :enum relation or enumerator to stream.
16
+ # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
17
+ # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
18
+ # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
19
+ # @yield A block factory to generate the relation or enumerator.
20
+ # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
21
+ def initialize(url, opts={}, &block)
22
+ @enum = opts[:enum] || block
23
+ raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
24
+
25
+ @blob = BFS::Blob.new(url)
26
+ @format = detect_format(opts[:format])
27
+ @compress = detect_compress(opts[:compress])
28
+ @last_mod = opts[:last_modified]
29
+ end
30
+
31
+ def perform
32
+ enum = @enum.is_a?(Proc) ? @enum.call : @enum
33
+ last_mod = @last_mod.is_a?(Proc) ? @last_mod.call(enum) : @last_mod
34
+ current = (last_mod.to_f * 1000).floor
35
+
36
+ begin
37
+ previous = @blob.info.metadata[META_LAST_MODIFIED].to_i
38
+ return -1 unless current > previous
39
+ rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
40
+ end if current.positive?
41
+
42
+ @blob.create metadata: { META_LAST_MODIFIED => current.to_s } do |io|
43
+ @compress.wrap(io) {|w| write_all(enum, w) }
44
+ end
45
+ @blob.info.size
46
+ end
47
+
48
+ private
49
+
50
+ def detect_format(val)
51
+ case val
52
+ when nil
53
+ Feedx::Format.detect(@blob.path)
54
+ when Class
55
+ parent = Feedx::Format::Abstract
56
+ raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
57
+
58
+ val
59
+ else
60
+ Feedx::Format.resolve(val)
61
+ end
62
+ end
63
+
64
+ def detect_compress(val)
65
+ case val
66
+ when nil
67
+ Feedx::Compression.detect(@blob.path)
68
+ when Class
69
+ parent = Feedx::Compression::Abstract
70
+ raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
71
+
72
+ val
73
+ else
74
+ Feedx::Compression.resolve(val)
75
+ end
76
+ end
77
+
78
+ def write_all(enum, io)
79
+ stream = @format.new(io)
80
+ iterator = enum.respond_to?(:find_each) ? :find_each : :each
81
+ enum.send(iterator) {|rec| stream.write(rec) }
82
+ end
83
+ end
84
+ end
@@ -1,85 +1,8 @@
1
- require 'uri'
2
- require 'bfs'
1
+ require 'feedx'
3
2
 
4
3
  module Feedx
5
- # Pushes a relation as a protobuf encoded stream to an S3 location.
6
- class Pusher
7
- META_LAST_MODIFIED = 'x-feedx-pusher-last-modified'.freeze
8
-
9
- # See constructor.
10
- def self.perform(url, opts={}, &block)
11
- new(url, opts, &block).perform
12
- end
13
-
14
- # @param [String] url the destination URL.
15
- # @param [Hash] opts options
16
- # @option opts [Enumerable,ActiveRecord::Relation] :enum relation or enumerator to stream.
17
- # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
18
- # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
19
- # @option opts [Time,Proc] :last_modified the last modified time, used to determine if a push is necessary.
20
- # @yield A block factory to generate the relation or enumerator.
21
- # @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
22
- def initialize(url, opts={}, &block)
23
- @enum = opts[:enum] || block
24
- raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
25
-
26
- @blob = BFS::Blob.new(url)
27
- @format = detect_format(opts[:format])
28
- @compress = detect_compress(opts[:compress])
29
- @last_mod = opts[:last_modified]
30
- end
31
-
32
- def perform
33
- enum = @enum.is_a?(Proc) ? @enum.call : @enum
34
- last_mod = @last_mod.is_a?(Proc) ? @last_mod.call(enum) : @last_mod
35
- current = (last_mod.to_f * 1000).floor
36
-
37
- begin
38
- previous = @blob.info.metadata[META_LAST_MODIFIED].to_i
39
- return -1 unless current > previous
40
- rescue BFS::FileNotFound # rubocop:disable Lint/HandleExceptions
41
- end if current.positive?
42
-
43
- @blob.create metadata: { META_LAST_MODIFIED => current.to_s } do |io|
44
- @compress.wrap(io) {|w| write_all(enum, w) }
45
- end
46
- @blob.info.size
47
- end
48
-
49
- private
50
-
51
- def detect_format(val)
52
- case val
53
- when nil
54
- Feedx::Format.detect(@blob.path)
55
- when Class
56
- parent = Feedx::Format::Abstract
57
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
58
-
59
- val
60
- else
61
- Feedx::Format.resolve(val)
62
- end
63
- end
64
-
65
- def detect_compress(val)
66
- case val
67
- when nil
68
- Feedx::Compression.detect(@blob.path)
69
- when Class
70
- parent = Feedx::Compression::Abstract
71
- raise ArgumentError, "Class #{val} must extend #{parent}" unless val < parent
72
-
73
- val
74
- else
75
- Feedx::Compression.resolve(val)
76
- end
77
- end
78
-
79
- def write_all(enum, io)
80
- stream = @format.new(io)
81
- iterator = enum.respond_to?(:find_each) ? :find_each : :each
82
- enum.send(iterator) {|rec| stream.write(rec) }
83
- end
4
+ unless defined?(Gem::Deprecate) && Gem::Deprecate.skip
5
+ warn "WARNING: Feedx::Pusher is deprecated; use Feedx::Producer instead (called from #{caller(2..2).first})."
84
6
  end
7
+ Pusher = Producer
85
8
  end
@@ -0,0 +1,130 @@
1
+ package feedx
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "io"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // ReaderOptions configure the reader instance.
13
+ type ReaderOptions struct {
14
+ // Format specifies the format
15
+ // Default: auto-detected from URL path.
16
+ Format Format
17
+
18
+ // Compression specifies the compression type.
19
+ // Default: auto-detected from URL path.
20
+ Compression Compression
21
+ }
22
+
23
+ func (o *ReaderOptions) norm(name string) error {
24
+ if o.Format == nil {
25
+ o.Format = DetectFormat(name)
26
+
27
+ if o.Format == nil {
28
+ return fmt.Errorf("feedx: unable to detect format from %q", name)
29
+ }
30
+ }
31
+ if o.Compression == nil {
32
+ o.Compression = DetectCompression(name)
33
+ }
34
+ return nil
35
+ }
36
+
37
+ // Reader reads data from a remote feed.
38
+ type Reader struct {
39
+ remote *bfs.Object
40
+ opt ReaderOptions
41
+ ctx context.Context
42
+ num int
43
+
44
+ br io.ReadCloser // bfs reader
45
+ cr io.ReadCloser // compression reader
46
+ fd FormatDecoder
47
+ }
48
+
49
+ // NewReader inits a new reader.
50
+ func NewReader(ctx context.Context, remote *bfs.Object, opt *ReaderOptions) (*Reader, error) {
51
+ var o ReaderOptions
52
+ if opt != nil {
53
+ o = *opt
54
+ }
55
+ if err := o.norm(remote.Name()); err != nil {
56
+ return nil, err
57
+ }
58
+
59
+ return &Reader{
60
+ remote: remote,
61
+ opt: o,
62
+ ctx: ctx,
63
+ }, nil
64
+ }
65
+
66
+ // Decode decodes the next value from the feed.
67
+ func (r *Reader) Decode(v interface{}) error {
68
+ if r.br == nil {
69
+ br, err := r.remote.Open(r.ctx)
70
+ if err != nil {
71
+ return err
72
+ }
73
+ r.br = br
74
+ }
75
+
76
+ if r.cr == nil {
77
+ cr, err := r.opt.Compression.NewReader(r.br)
78
+ if err != nil {
79
+ return err
80
+ }
81
+ r.cr = cr
82
+ }
83
+
84
+ if r.fd == nil {
85
+ fd, err := r.opt.Format.NewDecoder(r.cr)
86
+ if err != nil {
87
+ return err
88
+ }
89
+ r.fd = fd
90
+ }
91
+
92
+ if err := r.fd.Decode(v); err != nil {
93
+ return err
94
+ }
95
+
96
+ r.num++
97
+ return nil
98
+ }
99
+
100
+ // NumRead returns the number of read values.
101
+ func (r *Reader) NumRead() int {
102
+ return r.num
103
+ }
104
+
105
+ // LastModified returns the last modified time of the remote feed.
106
+ func (r *Reader) LastModified() (time.Time, error) {
107
+ lastMod, err := remoteLastModified(r.ctx, r.remote)
108
+ return lastMod.Time(), err
109
+ }
110
+
111
+ // Close closes the reader.
112
+ func (r *Reader) Close() error {
113
+ var err error
114
+ if r.fd != nil {
115
+ if e := r.fd.Close(); e != nil {
116
+ err = e
117
+ }
118
+ }
119
+ if r.cr != nil {
120
+ if e := r.cr.Close(); e != nil {
121
+ err = e
122
+ }
123
+ }
124
+ if r.br != nil {
125
+ if e := r.br.Close(); e != nil {
126
+ err = e
127
+ }
128
+ }
129
+ return err
130
+ }
@@ -0,0 +1,48 @@
1
+ package feedx_test
2
+
3
+ import (
4
+ "context"
5
+ "io"
6
+
7
+ "github.com/bsm/feedx"
8
+
9
+ "github.com/bsm/bfs"
10
+ tbp "github.com/golang/protobuf/proto/proto3_proto"
11
+ . "github.com/onsi/ginkgo"
12
+ . "github.com/onsi/gomega"
13
+ )
14
+
15
+ var _ = Describe("Reader", func() {
16
+ var subject *feedx.Reader
17
+ var obj *bfs.Object
18
+ var ctx = context.Background()
19
+
20
+ BeforeEach(func() {
21
+ obj = bfs.NewInMemObject("path/to/file.json")
22
+ Expect(writeMulti(obj, 3)).To(Succeed())
23
+
24
+ var err error
25
+ subject, err = feedx.NewReader(ctx, obj, nil)
26
+ Expect(err).NotTo(HaveOccurred())
27
+ })
28
+
29
+ AfterEach(func() {
30
+ Expect(subject.Close()).To(Succeed())
31
+ })
32
+
33
+ It("should read", func() {
34
+ var msgs []tbp.Message
35
+ for {
36
+ var msg tbp.Message
37
+ err := subject.Decode(&msg)
38
+ if err == io.EOF {
39
+ break
40
+ }
41
+ Expect(err).NotTo(HaveOccurred())
42
+ msgs = append(msgs, msg)
43
+ }
44
+
45
+ Expect(msgs).To(Equal([]tbp.Message{fixture, fixture, fixture}))
46
+ Expect(subject.NumRead()).To(Equal(3))
47
+ })
48
+ })
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe Feedx::Pusher do
3
+ RSpec.describe Feedx::Producer do
4
4
  let :model do
5
5
  Class.new Struct.new(:title) do
6
6
  def to_pb
@@ -60,7 +60,7 @@ RSpec.describe Feedx::Pusher do
60
60
 
61
61
  it 'should support last-modified' do
62
62
  described_class.perform 'mock:///dir/file.json', last_modified: Time.at(1515151515), enum: enumerable
63
- expect(bucket.info('dir/file.json').metadata).to eq('x-feedx-pusher-last-modified' => '1515151515000')
63
+ expect(bucket.info('dir/file.json').metadata).to eq('x-feedx-last-modified' => '1515151515000')
64
64
  end
65
65
 
66
66
  it 'should perform conditionally' do
@@ -0,0 +1,135 @@
1
+ package feedx
2
+
3
+ import (
4
+ "context"
5
+ "fmt"
6
+ "io"
7
+ "time"
8
+
9
+ "github.com/bsm/bfs"
10
+ )
11
+
12
+ // WriterOptions configure the producer instance.
13
+ type WriterOptions struct {
14
+ // Format specifies the format
15
+ // Default: auto-detected from URL path.
16
+ Format Format
17
+
18
+ // Compression specifies the compression type.
19
+ // Default: auto-detected from URL path.
20
+ Compression Compression
21
+
22
+ // Provides an optional last modified timestamp which is stored with the remote metadata.
23
+ // Default: time.Now().
24
+ LastMod time.Time
25
+ }
26
+
27
+ func (o *WriterOptions) norm(name string) error {
28
+ if o.Format == nil {
29
+ o.Format = DetectFormat(name)
30
+
31
+ if o.Format == nil {
32
+ return fmt.Errorf("feedx: unable to detect format from %q", name)
33
+ }
34
+ }
35
+
36
+ if o.Compression == nil {
37
+ o.Compression = DetectCompression(name)
38
+ }
39
+
40
+ if o.LastMod.IsZero() {
41
+ o.LastMod = time.Now()
42
+ }
43
+
44
+ return nil
45
+ }
46
+
47
+ // Writer encodes feeds to remote locations.
48
+ type Writer struct {
49
+ ctx context.Context
50
+ remote *bfs.Object
51
+ opt WriterOptions
52
+ num int
53
+
54
+ bw io.WriteCloser // bfs writer
55
+ cw io.WriteCloser // compression writer
56
+ fe FormatEncoder
57
+ }
58
+
59
+ // NewWriter inits a new feed writer.
60
+ func NewWriter(ctx context.Context, remote *bfs.Object, opt *WriterOptions) (*Writer, error) {
61
+ var o WriterOptions
62
+ if opt != nil {
63
+ o = *opt
64
+ }
65
+ o.norm(remote.Name())
66
+
67
+ return &Writer{
68
+ ctx: ctx,
69
+ remote: remote,
70
+ opt: o,
71
+ }, nil
72
+ }
73
+
74
+ // Encode appends a value to the feed.
75
+ func (w *Writer) Encode(v interface{}) error {
76
+ if w.bw == nil {
77
+ ts := timestampFromTime(w.opt.LastMod)
78
+ bw, err := w.remote.Create(w.ctx, &bfs.WriteOptions{
79
+ Metadata: map[string]string{metaLastModified: ts.String()},
80
+ })
81
+ if err != nil {
82
+ return err
83
+ }
84
+ w.bw = bw
85
+ }
86
+
87
+ if w.cw == nil {
88
+ cw, err := w.opt.Compression.NewWriter(w.bw)
89
+ if err != nil {
90
+ return err
91
+ }
92
+ w.cw = cw
93
+ }
94
+
95
+ if w.fe == nil {
96
+ fe, err := w.opt.Format.NewEncoder(w.cw)
97
+ if err != nil {
98
+ return err
99
+ }
100
+ w.fe = fe
101
+ }
102
+
103
+ if err := w.fe.Encode(v); err != nil {
104
+ return err
105
+ }
106
+
107
+ w.num++
108
+ return nil
109
+ }
110
+
111
+ // NumWritten returns the number of written values.
112
+ func (w *Writer) NumWritten() int {
113
+ return w.num
114
+ }
115
+
116
+ // Close closes the writer.
117
+ func (w *Writer) Close() error {
118
+ var err error
119
+ if w.fe != nil {
120
+ if e := w.fe.Close(); e != nil {
121
+ err = e
122
+ }
123
+ }
124
+ if w.cw != nil {
125
+ if e := w.cw.Close(); e != nil {
126
+ err = e
127
+ }
128
+ }
129
+ if w.bw != nil {
130
+ if e := w.bw.Close(); e != nil {
131
+ err = e
132
+ }
133
+ }
134
+ return err
135
+ }
@@ -0,0 +1,34 @@
1
+ package feedx_test
2
+
3
+ import (
4
+ "context"
5
+
6
+ "github.com/bsm/bfs"
7
+ . "github.com/onsi/ginkgo"
8
+ . "github.com/onsi/gomega"
9
+ )
10
+
11
+ var _ = Describe("Writer", func() {
12
+ var plain, compressed *bfs.Object
13
+ var ctx = context.Background()
14
+
15
+ BeforeEach(func() {
16
+ plain = bfs.NewInMemObject("path/to/file.json")
17
+ compressed = bfs.NewInMemObject("path/to/file.jsonz")
18
+ })
19
+
20
+ It("should encode", func() {
21
+ Expect(writeMulti(plain, 10)).To(Succeed())
22
+ Expect(writeMulti(compressed, 10)).To(Succeed())
23
+
24
+ info, err := plain.Head(ctx)
25
+ Expect(err).NotTo(HaveOccurred())
26
+ Expect(info.Size).To(BeNumerically("~", 470, 10))
27
+ Expect(info.Metadata).To(Equal(map[string]string{"x-feedx-last-modified": "1515151515123"}))
28
+
29
+ info, err = compressed.Head(ctx)
30
+ Expect(err).NotTo(HaveOccurred())
31
+ Expect(info.Size).To(BeNumerically("~", 76, 10))
32
+ Expect(info.Metadata).To(Equal(map[string]string{"x-feedx-last-modified": "1515151515123"}))
33
+ })
34
+ })
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Black Square Media Ltd
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-14 00:00:00.000000000 Z
11
+ date: 2018-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bfs
@@ -131,15 +131,20 @@ files:
131
131
  - lib/feedx/format/abstract.rb
132
132
  - lib/feedx/format/json.rb
133
133
  - lib/feedx/format/protobuf.rb
134
+ - lib/feedx/producer.rb
134
135
  - lib/feedx/pusher.rb
136
+ - reader.go
137
+ - reader_test.go
135
138
  - spec/feedx/compression/gzip_spec.rb
136
139
  - spec/feedx/compression/none_spec.rb
137
140
  - spec/feedx/compression_spec.rb
138
141
  - spec/feedx/format/json_spec.rb
139
142
  - spec/feedx/format/protobuf_spec.rb
140
143
  - spec/feedx/format_spec.rb
141
- - spec/feedx/pusher_spec.rb
144
+ - spec/feedx/producer_spec.rb
142
145
  - spec/spec_helper.rb
146
+ - writer.go
147
+ - writer_test.go
143
148
  homepage: https://github.com/bsm/feedx
144
149
  licenses:
145
150
  - Apache-2.0
@@ -171,5 +176,5 @@ test_files:
171
176
  - spec/feedx/format/json_spec.rb
172
177
  - spec/feedx/format/protobuf_spec.rb
173
178
  - spec/feedx/format_spec.rb
174
- - spec/feedx/pusher_spec.rb
179
+ - spec/feedx/producer_spec.rb
175
180
  - spec/spec_helper.rb