feedx 0.12.7 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -37
- data/.golangci.yml +13 -4
- data/.rubocop.yml +8 -14
- data/.tool-versions +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +54 -68
- data/Makefile +3 -3
- data/README.md +3 -1
- data/compression.go +29 -0
- data/compression_test.go +73 -61
- data/consumer.go +96 -152
- data/consumer_test.go +124 -59
- data/example_test.go +140 -0
- data/feedx.gemspec +2 -10
- data/feedx.go +16 -31
- data/feedx_ext_test.go +13 -3
- data/feedx_test.go +24 -26
- data/format.go +29 -19
- data/format_test.go +84 -56
- data/go.mod +11 -7
- data/go.sum +16 -138
- data/incremental.go +122 -0
- data/incremental_test.go +62 -0
- data/lib/feedx/cache/abstract.rb +3 -3
- data/lib/feedx/cache/value.rb +6 -6
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +4 -4
- data/lib/feedx/consumer.rb +8 -8
- data/lib/feedx/format/abstract.rb +6 -6
- data/lib/feedx/format/json.rb +2 -2
- data/lib/feedx/format/protobuf.rb +6 -6
- data/lib/feedx/format.rb +1 -3
- data/lib/feedx/producer.rb +11 -11
- data/lib/feedx/stream.rb +2 -2
- data/lib/feedx.rb +2 -3
- data/manifest.go +65 -0
- data/producer.go +34 -137
- data/producer_test.go +46 -60
- data/reader.go +142 -41
- data/reader_test.go +86 -35
- data/scheduler.go +176 -0
- data/scheduler_test.go +128 -0
- data/writer.go +13 -13
- data/writer_test.go +61 -44
- metadata +12 -137
- data/.github/workflows/lint.yml +0 -18
- data/ext/parquet/decoder.go +0 -59
- data/ext/parquet/decoder_test.go +0 -88
- data/ext/parquet/encoder.go +0 -27
- data/ext/parquet/encoder_test.go +0 -70
- data/ext/parquet/go.mod +0 -12
- data/ext/parquet/go.sum +0 -193
- data/ext/parquet/parquet.go +0 -78
- data/ext/parquet/parquet_test.go +0 -28
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/lib/feedx/format/parquet.rb +0 -102
- data/spec/feedx/cache/memory_spec.rb +0 -23
- data/spec/feedx/cache/value_spec.rb +0 -19
- data/spec/feedx/compression/gzip_spec.rb +0 -17
- data/spec/feedx/compression/none_spec.rb +0 -15
- data/spec/feedx/compression_spec.rb +0 -19
- data/spec/feedx/consumer_spec.rb +0 -49
- data/spec/feedx/format/abstract_spec.rb +0 -21
- data/spec/feedx/format/json_spec.rb +0 -27
- data/spec/feedx/format/parquet_spec.rb +0 -30
- data/spec/feedx/format/protobuf_spec.rb +0 -23
- data/spec/feedx/format_spec.rb +0 -21
- data/spec/feedx/producer_spec.rb +0 -74
- data/spec/feedx/stream_spec.rb +0 -109
- data/spec/spec_helper.rb +0 -57
data/incremental.go
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
package feedx
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"errors"
|
|
6
|
+
|
|
7
|
+
"github.com/bsm/bfs"
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
// IncrmentalProduceFunc returns a ProduceFunc closure around an incremental version.
|
|
11
|
+
type IncrementalProduceFunc func(remoteVersion int64) ProduceFunc
|
|
12
|
+
|
|
13
|
+
// IncrementalProducer pushes incremental feeds to a remote bucket location.
|
|
14
|
+
type IncrementalProducer struct {
|
|
15
|
+
bucket bfs.Bucket
|
|
16
|
+
object *bfs.Object
|
|
17
|
+
ownBucket bool
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// NewIncrementalProducer inits a new incremental feed producer.
|
|
21
|
+
func NewIncrementalProducer(ctx context.Context, bucketURL string) (*IncrementalProducer, error) {
|
|
22
|
+
bucket, err := bfs.Connect(ctx, bucketURL)
|
|
23
|
+
if err != nil {
|
|
24
|
+
return nil, err
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
pcr := NewIncrementalProducerForBucket(bucket)
|
|
28
|
+
pcr.ownBucket = true
|
|
29
|
+
return pcr, nil
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// NewIncrementalProducerForRemote starts a new incremental feed producer for a bucket.
|
|
33
|
+
func NewIncrementalProducerForBucket(bucket bfs.Bucket) *IncrementalProducer {
|
|
34
|
+
return &IncrementalProducer{
|
|
35
|
+
bucket: bucket,
|
|
36
|
+
object: bfs.NewObjectFromBucket(bucket, "manifest.json"),
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Close stops the producer.
|
|
41
|
+
func (p *IncrementalProducer) Close() (err error) {
|
|
42
|
+
if e := p.object.Close(); e != nil {
|
|
43
|
+
err = errors.Join(err, e)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if p.ownBucket && p.bucket != nil {
|
|
47
|
+
if e := p.bucket.Close(); e != nil {
|
|
48
|
+
err = errors.Join(err, e)
|
|
49
|
+
}
|
|
50
|
+
p.bucket = nil
|
|
51
|
+
}
|
|
52
|
+
return
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
func (p *IncrementalProducer) Produce(ctx context.Context, version int64, opt *WriterOptions, pfn IncrementalProduceFunc) (*Status, error) {
|
|
56
|
+
status := Status{LocalVersion: version}
|
|
57
|
+
|
|
58
|
+
// fetch manifest from remote object
|
|
59
|
+
mft, err := loadManifest(ctx, p.object)
|
|
60
|
+
if err != nil {
|
|
61
|
+
return nil, err
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// skip if not modified
|
|
65
|
+
remoteVersion := mft.Version
|
|
66
|
+
status.RemoteVersion = remoteVersion
|
|
67
|
+
if skipSync(version, remoteVersion) {
|
|
68
|
+
status.Skipped = true
|
|
69
|
+
return &status, nil
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// set version for writer
|
|
73
|
+
if opt == nil {
|
|
74
|
+
opt = new(WriterOptions)
|
|
75
|
+
}
|
|
76
|
+
opt.Version = version
|
|
77
|
+
|
|
78
|
+
// write data modified since last version
|
|
79
|
+
numWritten, err := p.writeDataFile(ctx, mft, version, remoteVersion, opt, pfn)
|
|
80
|
+
if err != nil {
|
|
81
|
+
return nil, err
|
|
82
|
+
}
|
|
83
|
+
// write new manifest to remote
|
|
84
|
+
if err := p.commitManifest(ctx, mft, &WriterOptions{Version: version}); err != nil {
|
|
85
|
+
return nil, err
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
status.NumItems = numWritten
|
|
89
|
+
return &status, nil
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
func (p *IncrementalProducer) writeDataFile(ctx context.Context, mft *manifest, version, remoteVersion int64, opt *WriterOptions, pfn IncrementalProduceFunc) (int64, error) {
|
|
93
|
+
fname := mft.newDataFileName(opt)
|
|
94
|
+
|
|
95
|
+
obj := bfs.NewObjectFromBucket(p.bucket, fname)
|
|
96
|
+
defer obj.Close()
|
|
97
|
+
|
|
98
|
+
writer := NewWriter(ctx, obj, opt)
|
|
99
|
+
defer writer.Discard()
|
|
100
|
+
|
|
101
|
+
if err := pfn(remoteVersion)(writer); err != nil {
|
|
102
|
+
return 0, err
|
|
103
|
+
}
|
|
104
|
+
if err := writer.Commit(); err != nil {
|
|
105
|
+
return 0, err
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
mft.Files = append(mft.Files, fname)
|
|
109
|
+
mft.Version = version
|
|
110
|
+
|
|
111
|
+
return writer.NumWritten(), nil
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
func (p *IncrementalProducer) commitManifest(ctx context.Context, mft *manifest, opt *WriterOptions) error {
|
|
115
|
+
writer := NewWriter(ctx, p.object, opt)
|
|
116
|
+
defer writer.Discard()
|
|
117
|
+
|
|
118
|
+
if err := writer.Encode(mft); err != nil {
|
|
119
|
+
return err
|
|
120
|
+
}
|
|
121
|
+
return writer.Commit()
|
|
122
|
+
}
|
data/incremental_test.go
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
package feedx_test
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"reflect"
|
|
5
|
+
"testing"
|
|
6
|
+
|
|
7
|
+
"github.com/bsm/bfs"
|
|
8
|
+
"github.com/bsm/feedx"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
func TestIncrementalProducer(t *testing.T) {
|
|
12
|
+
bucket := bfs.NewInMem()
|
|
13
|
+
defer bucket.Close()
|
|
14
|
+
|
|
15
|
+
pcr := feedx.NewIncrementalProducerForBucket(bucket)
|
|
16
|
+
defer pcr.Close()
|
|
17
|
+
|
|
18
|
+
// first produce
|
|
19
|
+
testIncProduce(t, pcr, 101, &feedx.Status{LocalVersion: 101, NumItems: 10})
|
|
20
|
+
|
|
21
|
+
// second produce
|
|
22
|
+
testIncProduce(t, pcr, 101, &feedx.Status{Skipped: true, LocalVersion: 101, RemoteVersion: 101})
|
|
23
|
+
|
|
24
|
+
// increment version
|
|
25
|
+
testIncProduce(t, pcr, 134, &feedx.Status{LocalVersion: 134, RemoteVersion: 101, NumItems: 3})
|
|
26
|
+
|
|
27
|
+
obj := bfs.NewObjectFromBucket(bucket, "manifest.json")
|
|
28
|
+
defer obj.Close()
|
|
29
|
+
|
|
30
|
+
mft, err := feedx.LoadManifest(t.Context(), obj)
|
|
31
|
+
if err != nil {
|
|
32
|
+
t.Fatal("unexpected error", err)
|
|
33
|
+
} else if exp := (&feedx.Manifest{
|
|
34
|
+
Version: 134,
|
|
35
|
+
Files: []string{"data-0-101.json", "data-0-134.json"},
|
|
36
|
+
}); !reflect.DeepEqual(exp, mft) {
|
|
37
|
+
t.Errorf("expected %#v, got %#v", exp, mft)
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
func testIncProduce(t *testing.T, pcr *feedx.IncrementalProducer, version int64, exp *feedx.Status) {
|
|
42
|
+
t.Helper()
|
|
43
|
+
|
|
44
|
+
status, err := pcr.Produce(t.Context(), version, nil, func(sinceVersion int64) feedx.ProduceFunc {
|
|
45
|
+
return func(w *feedx.Writer) error {
|
|
46
|
+
n := (version - sinceVersion) / 10
|
|
47
|
+
for i := int64(0); i < n; i++ {
|
|
48
|
+
if err := w.Encode(seed()); err != nil {
|
|
49
|
+
return err
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return nil
|
|
53
|
+
}
|
|
54
|
+
})
|
|
55
|
+
if err != nil {
|
|
56
|
+
t.Fatal("unexpected error", err)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if !reflect.DeepEqual(exp, status) {
|
|
60
|
+
t.Errorf("expected %#v, got %#v", exp, status)
|
|
61
|
+
}
|
|
62
|
+
}
|
data/lib/feedx/cache/abstract.rb
CHANGED
|
@@ -17,12 +17,12 @@ class Feedx::Cache::Abstract
|
|
|
17
17
|
# Fetches data from the cache, using the given key.
|
|
18
18
|
# The optional block will be evaluated and the result stored in the cache
|
|
19
19
|
# in the event of a cache miss.
|
|
20
|
-
def fetch(key, **
|
|
21
|
-
value = read(key, **
|
|
20
|
+
def fetch(key, **)
|
|
21
|
+
value = read(key, **)
|
|
22
22
|
|
|
23
23
|
if block_given?
|
|
24
24
|
value ||= yield
|
|
25
|
-
write(key, value, **
|
|
25
|
+
write(key, value, **) if value
|
|
26
26
|
end
|
|
27
27
|
|
|
28
28
|
value
|
data/lib/feedx/cache/value.rb
CHANGED
|
@@ -8,18 +8,18 @@ class Feedx::Cache::Value
|
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
# Read the key.
|
|
11
|
-
def read(**
|
|
12
|
-
@cache.read(@key, **
|
|
11
|
+
def read(**)
|
|
12
|
+
@cache.read(@key, **)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
# Write a value.
|
|
16
|
-
def write(value, **
|
|
17
|
-
@cache.write(@key, value, **
|
|
16
|
+
def write(value, **)
|
|
17
|
+
@cache.write(@key, value, **)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
# Fetches data. The optional block will be evaluated and the
|
|
21
21
|
# result stored in the cache under the key in the event of a cache miss.
|
|
22
|
-
def fetch(
|
|
23
|
-
@cache.fetch(@key,
|
|
22
|
+
def fetch(**, &)
|
|
23
|
+
@cache.fetch(@key, **, &)
|
|
24
24
|
end
|
|
25
25
|
end
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
require 'zlib'
|
|
2
2
|
|
|
3
3
|
class Feedx::Compression::Gzip < Feedx::Compression::Abstract
|
|
4
|
-
def reader(io, **, &
|
|
4
|
+
def reader(io, **, &)
|
|
5
5
|
force_binmode(io)
|
|
6
|
-
Zlib::GzipReader.wrap(io, &
|
|
6
|
+
Zlib::GzipReader.wrap(io, &)
|
|
7
7
|
end
|
|
8
8
|
|
|
9
|
-
def writer(io, **, &
|
|
9
|
+
def writer(io, **, &)
|
|
10
10
|
force_binmode(io)
|
|
11
|
-
Zlib::GzipWriter.wrap(io, &
|
|
11
|
+
Zlib::GzipWriter.wrap(io, &)
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
private
|
data/lib/feedx/consumer.rb
CHANGED
|
@@ -8,8 +8,8 @@ module Feedx
|
|
|
8
8
|
include Enumerable
|
|
9
9
|
|
|
10
10
|
# See constructor.
|
|
11
|
-
def self.each(url, klass,
|
|
12
|
-
new(url, klass, **
|
|
11
|
+
def self.each(url, klass, **, &)
|
|
12
|
+
new(url, klass, **).each(&)
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
# @param [String] url the destination URL.
|
|
@@ -17,7 +17,7 @@ module Feedx
|
|
|
17
17
|
# @param [Hash] opts options
|
|
18
18
|
# @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
|
|
19
19
|
# @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
|
|
20
|
-
# @option opts [Feedx::Cache::Value] :cache cache value to store remote
|
|
20
|
+
# @option opts [Feedx::Cache::Value] :cache cache value to store remote version and consume conditionally.
|
|
21
21
|
def initialize(url, klass, format_options: {}, cache: nil, **opts)
|
|
22
22
|
@klass = klass
|
|
23
23
|
@url = url
|
|
@@ -32,19 +32,19 @@ module Feedx
|
|
|
32
32
|
# @return [Boolean] returns true if performed.
|
|
33
33
|
def each(&block)
|
|
34
34
|
stream = Feedx::Stream.new(@url, **@opts)
|
|
35
|
-
|
|
35
|
+
remote_ver = nil
|
|
36
36
|
|
|
37
37
|
if @cache
|
|
38
38
|
metadata = stream.blob.info.metadata
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
return false if
|
|
39
|
+
local_ver = @cache.read.to_i
|
|
40
|
+
remote_ver = (metadata[META_VERSION] || metadata[META_VERSION_DC]).to_i
|
|
41
|
+
return false if remote_ver.positive? && remote_ver <= local_ver
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
stream.open do |fmt|
|
|
45
45
|
fmt.decode_each(@klass, **@opts, &block)
|
|
46
46
|
end
|
|
47
|
-
@cache.write(
|
|
47
|
+
@cache.write(remote_ver) if @cache && remote_ver
|
|
48
48
|
|
|
49
49
|
true
|
|
50
50
|
ensure
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
class Feedx::Format::Abstract
|
|
2
|
-
def decoder(io,
|
|
3
|
-
self.class::Decoder.open(io,
|
|
2
|
+
def decoder(io, **, &)
|
|
3
|
+
self.class::Decoder.open(io, **, &)
|
|
4
4
|
end
|
|
5
5
|
|
|
6
|
-
def encoder(io,
|
|
7
|
-
self.class::Encoder.open(io,
|
|
6
|
+
def encoder(io, **, &)
|
|
7
|
+
self.class::Encoder.open(io, **, &)
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
class Wrapper
|
|
11
|
-
def self.open(io, **
|
|
12
|
-
inst = new(io, **
|
|
11
|
+
def self.open(io, **)
|
|
12
|
+
inst = new(io, **)
|
|
13
13
|
yield inst
|
|
14
14
|
ensure
|
|
15
15
|
inst&.close
|
data/lib/feedx/format/json.rb
CHANGED
|
@@ -13,8 +13,8 @@ class Feedx::Format::JSON < Feedx::Format::Abstract
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
class Encoder < Feedx::Format::Abstract::Encoder
|
|
16
|
-
def encode(msg, **
|
|
17
|
-
@io.write msg.to_json(**
|
|
16
|
+
def encode(msg, **)
|
|
17
|
+
@io.write msg.to_json(**) << "\n"
|
|
18
18
|
end
|
|
19
19
|
end
|
|
20
20
|
end
|
|
@@ -2,8 +2,8 @@ require 'pbio'
|
|
|
2
2
|
|
|
3
3
|
class Feedx::Format::Protobuf < Feedx::Format::Abstract
|
|
4
4
|
class Decoder < Feedx::Format::Abstract::Decoder
|
|
5
|
-
def initialize(io, **
|
|
6
|
-
super
|
|
5
|
+
def initialize(io, **)
|
|
6
|
+
super(PBIO::Delimited.new(io), **)
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
def decode(target, **)
|
|
@@ -12,12 +12,12 @@ class Feedx::Format::Protobuf < Feedx::Format::Abstract
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
class Encoder < Feedx::Format::Abstract::Encoder
|
|
15
|
-
def initialize(io, **
|
|
16
|
-
super
|
|
15
|
+
def initialize(io, **)
|
|
16
|
+
super(PBIO::Delimited.new(io), **)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
def encode(msg, **
|
|
20
|
-
msg = msg.to_pb(**
|
|
19
|
+
def encode(msg, **)
|
|
20
|
+
msg = msg.to_pb(**) if msg.respond_to?(:to_pb)
|
|
21
21
|
@io.write msg
|
|
22
22
|
end
|
|
23
23
|
end
|
data/lib/feedx/format.rb
CHANGED
|
@@ -2,7 +2,6 @@ module Feedx
|
|
|
2
2
|
module Format
|
|
3
3
|
autoload :Abstract, 'feedx/format/abstract'
|
|
4
4
|
autoload :JSON, 'feedx/format/json'
|
|
5
|
-
autoload :Parquet, 'feedx/format/parquet'
|
|
6
5
|
autoload :Protobuf, 'feedx/format/protobuf'
|
|
7
6
|
|
|
8
7
|
class << self
|
|
@@ -30,7 +29,7 @@ module Feedx
|
|
|
30
29
|
kind = _resolve(ext[1..]) || _resolve(ext[1..-2])
|
|
31
30
|
return kind if kind
|
|
32
31
|
|
|
33
|
-
base = base[0
|
|
32
|
+
base = base[0..(-ext.size - 1)]
|
|
34
33
|
end
|
|
35
34
|
end
|
|
36
35
|
|
|
@@ -41,7 +40,6 @@ module Feedx
|
|
|
41
40
|
'json' => :JSON,
|
|
42
41
|
'jsonl' => :JSON,
|
|
43
42
|
'ndjson' => :JSON,
|
|
44
|
-
'parquet' => :Parquet,
|
|
45
43
|
'pb' => :Protobuf,
|
|
46
44
|
'proto' => :Protobuf,
|
|
47
45
|
'protobuf' => :Protobuf,
|
data/lib/feedx/producer.rb
CHANGED
|
@@ -6,8 +6,8 @@ module Feedx
|
|
|
6
6
|
# Produces a relation as an encoded feed to a remote location.
|
|
7
7
|
class Producer
|
|
8
8
|
# See constructor.
|
|
9
|
-
def self.perform(url,
|
|
10
|
-
new(url,
|
|
9
|
+
def self.perform(url, **, &)
|
|
10
|
+
new(url, **, &).perform
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
# @param [String] url the destination URL.
|
|
@@ -15,16 +15,16 @@ module Feedx
|
|
|
15
15
|
# @option opts [Enumerable,ActiveRecord::Relation] :enum relation or enumerator to stream.
|
|
16
16
|
# @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
|
|
17
17
|
# @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
|
|
18
|
-
# @option opts [
|
|
18
|
+
# @option opts [Integer] :version the most recent version, used to determine if a push is necessary.
|
|
19
19
|
# @yield A block factory to generate the relation or enumerator.
|
|
20
20
|
# @yieldreturn [Enumerable,ActiveRecord::Relation] the relation or enumerator to stream.
|
|
21
|
-
def initialize(url,
|
|
21
|
+
def initialize(url, version: nil, format_options: {}, enum: nil, **opts, &block)
|
|
22
22
|
@enum = enum || block
|
|
23
23
|
raise ArgumentError, "#{self.class.name}.new expects an :enum option or a block factory" unless @enum
|
|
24
24
|
|
|
25
25
|
@url = url
|
|
26
26
|
@opts = opts.merge(format_options)
|
|
27
|
-
@
|
|
27
|
+
@version = version
|
|
28
28
|
|
|
29
29
|
return if format_options.empty? || (defined?(Gem::Deprecate) && Gem::Deprecate.skip)
|
|
30
30
|
|
|
@@ -34,18 +34,18 @@ module Feedx
|
|
|
34
34
|
def perform
|
|
35
35
|
Feedx::Stream.open(@url, **@opts) do |stream|
|
|
36
36
|
enum = @enum.is_a?(Proc) ? @enum.call : @enum
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
local_ver = @version.is_a?(Proc) ? @version.call(enum) : @version
|
|
38
|
+
local_ver = local_ver.to_i
|
|
39
39
|
|
|
40
40
|
begin
|
|
41
41
|
metadata = stream.blob.info.metadata
|
|
42
|
-
|
|
43
|
-
return -1 unless
|
|
42
|
+
remote_ver = (metadata[META_VERSION] || metadata[META_VERSION_DC]).to_i
|
|
43
|
+
return -1 unless local_ver > remote_ver
|
|
44
44
|
rescue BFS::FileNotFound
|
|
45
45
|
nil
|
|
46
|
-
end if
|
|
46
|
+
end if local_ver.positive?
|
|
47
47
|
|
|
48
|
-
stream.create metadata: {
|
|
48
|
+
stream.create metadata: { META_VERSION => local_ver.to_s } do |fmt|
|
|
49
49
|
iter = enum.respond_to?(:find_each) ? :find_each : :each
|
|
50
50
|
enum.send(iter) {|rec| fmt.encode(rec, **@opts) }
|
|
51
51
|
end
|
data/lib/feedx/stream.rb
CHANGED
|
@@ -8,8 +8,8 @@ module Feedx
|
|
|
8
8
|
|
|
9
9
|
# Behaves like new, but accepts an optional block.
|
|
10
10
|
# If a block is given, streams are automatically closed after the block is yielded.
|
|
11
|
-
def self.open(url, **
|
|
12
|
-
stream = new(url, **
|
|
11
|
+
def self.open(url, **)
|
|
12
|
+
stream = new(url, **)
|
|
13
13
|
return stream unless block_given?
|
|
14
14
|
|
|
15
15
|
begin
|
data/lib/feedx.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module Feedx
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
META_VERSION = 'X-Feedx-Version'.freeze
|
|
3
|
+
META_VERSION_DC = META_VERSION.downcase.freeze
|
|
4
4
|
|
|
5
5
|
autoload :Cache, 'feedx/cache'
|
|
6
6
|
autoload :Compression, 'feedx/compression'
|
|
@@ -9,5 +9,4 @@ module Feedx
|
|
|
9
9
|
autoload :Stream, 'feedx/stream'
|
|
10
10
|
autoload :Producer, 'feedx/producer'
|
|
11
11
|
autoload :Pusher, 'feedx/pusher'
|
|
12
|
-
autoload :TaskState, 'feedx/task_state'
|
|
13
12
|
end
|
data/manifest.go
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
package feedx
|
|
2
|
+
|
|
3
|
+
import (
|
|
4
|
+
"context"
|
|
5
|
+
"errors"
|
|
6
|
+
"strconv"
|
|
7
|
+
"strings"
|
|
8
|
+
|
|
9
|
+
"github.com/bsm/bfs"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
// manifest holds the current feed status.
|
|
13
|
+
// the current manifest is consumed before each push and a new manifest written after each push.
|
|
14
|
+
type manifest struct {
|
|
15
|
+
// Version holds the most recent version of the records included in Files.
|
|
16
|
+
Version int64 `json:"version"`
|
|
17
|
+
// Generation is a incrementing counter for use in file compaction.
|
|
18
|
+
Generation int `json:"generation"`
|
|
19
|
+
// Files holds a set of data files
|
|
20
|
+
Files []string `json:"files"`
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func loadManifest(ctx context.Context, obj *bfs.Object) (*manifest, error) {
|
|
24
|
+
m := new(manifest)
|
|
25
|
+
|
|
26
|
+
r, err := NewReader(ctx, obj, nil)
|
|
27
|
+
if errors.Is(err, bfs.ErrNotFound) {
|
|
28
|
+
return m, nil
|
|
29
|
+
} else if err != nil {
|
|
30
|
+
return nil, err
|
|
31
|
+
}
|
|
32
|
+
defer func() { _ = r.Close() }()
|
|
33
|
+
|
|
34
|
+
if err := r.Decode(m); errors.Is(err, bfs.ErrNotFound) { // some BFS implementations defer Open-ing the S3 object till first Decode call
|
|
35
|
+
return m, nil
|
|
36
|
+
} else if err != nil {
|
|
37
|
+
return nil, err
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return m, nil
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
func (m *manifest) newDataFileName(wopt *WriterOptions) string {
|
|
44
|
+
version := strings.ReplaceAll(strconv.FormatInt(wopt.Version, 10), ".", "")
|
|
45
|
+
|
|
46
|
+
formatExt := ".json"
|
|
47
|
+
switch wopt.Format {
|
|
48
|
+
case ProtobufFormat:
|
|
49
|
+
formatExt = ".pb"
|
|
50
|
+
case CBORFormat:
|
|
51
|
+
formatExt = ".cbor"
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
var compressionSuffix string
|
|
55
|
+
switch wopt.Compression {
|
|
56
|
+
case GZipCompression:
|
|
57
|
+
compressionSuffix = "z"
|
|
58
|
+
case FlateCompression:
|
|
59
|
+
compressionSuffix = ".flate"
|
|
60
|
+
case ZstdCompression:
|
|
61
|
+
compressionSuffix = ".zst"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return "data-" + strconv.Itoa(m.Generation) + "-" + version + formatExt + compressionSuffix
|
|
65
|
+
}
|