feedx 0.12.6 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +2 -49
- data/.golangci.yml +13 -0
- data/.rubocop.yml +8 -14
- data/.tool-versions +1 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +54 -69
- data/Makefile +3 -3
- data/README.md +3 -1
- data/compression.go +29 -0
- data/compression_test.go +73 -61
- data/consumer.go +96 -152
- data/consumer_test.go +124 -59
- data/example_test.go +140 -0
- data/feedx.gemspec +3 -10
- data/feedx.go +16 -31
- data/feedx_ext_test.go +13 -3
- data/feedx_test.go +24 -24
- data/format.go +29 -19
- data/format_test.go +84 -56
- data/go.mod +12 -10
- data/go.sum +18 -142
- data/incremental.go +122 -0
- data/incremental_test.go +62 -0
- data/lib/feedx/cache/abstract.rb +3 -3
- data/lib/feedx/cache/value.rb +6 -6
- data/lib/feedx/compression/abstract.rb +2 -2
- data/lib/feedx/compression/gzip.rb +4 -4
- data/lib/feedx/consumer.rb +8 -8
- data/lib/feedx/format/abstract.rb +6 -6
- data/lib/feedx/format/json.rb +2 -2
- data/lib/feedx/format/protobuf.rb +6 -6
- data/lib/feedx/format.rb +1 -3
- data/lib/feedx/producer.rb +11 -11
- data/lib/feedx/stream.rb +2 -2
- data/lib/feedx.rb +2 -3
- data/manifest.go +65 -0
- data/producer.go +34 -137
- data/producer_test.go +46 -60
- data/reader.go +142 -41
- data/reader_test.go +86 -35
- data/scheduler.go +176 -0
- data/scheduler_test.go +128 -0
- data/writer.go +13 -13
- data/writer_test.go +61 -44
- metadata +15 -137
- data/ext/parquet/decoder.go +0 -170
- data/ext/parquet/decoder_test.go +0 -88
- data/ext/parquet/go.mod +0 -10
- data/ext/parquet/go.sum +0 -154
- data/ext/parquet/parquet.go +0 -78
- data/ext/parquet/parquet_test.go +0 -28
- data/ext/parquet/reader.go +0 -89
- data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
- data/ext/parquet/types.go +0 -51
- data/lib/feedx/format/parquet.rb +0 -102
- data/spec/feedx/cache/memory_spec.rb +0 -23
- data/spec/feedx/cache/value_spec.rb +0 -19
- data/spec/feedx/compression/gzip_spec.rb +0 -17
- data/spec/feedx/compression/none_spec.rb +0 -15
- data/spec/feedx/compression_spec.rb +0 -19
- data/spec/feedx/consumer_spec.rb +0 -49
- data/spec/feedx/format/abstract_spec.rb +0 -21
- data/spec/feedx/format/json_spec.rb +0 -27
- data/spec/feedx/format/parquet_spec.rb +0 -30
- data/spec/feedx/format/protobuf_spec.rb +0 -23
- data/spec/feedx/format_spec.rb +0 -21
- data/spec/feedx/producer_spec.rb +0 -74
- data/spec/feedx/stream_spec.rb +0 -109
- data/spec/spec_helper.rb +0 -57
data/ext/parquet/parquet.go
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
package parquet
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"fmt"
|
|
5
|
-
"io"
|
|
6
|
-
"io/ioutil"
|
|
7
|
-
"os"
|
|
8
|
-
|
|
9
|
-
"github.com/bsm/feedx"
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
// Format is a parquet format.
|
|
13
|
-
type Format struct {
|
|
14
|
-
TempDir string
|
|
15
|
-
Columns []string // column names to include
|
|
16
|
-
BatchSize int // batch size, default: 1,000
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
// NewDecoder implements Format.
|
|
20
|
-
func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
|
|
21
|
-
if rs, ok := r.(io.ReadSeeker); ok {
|
|
22
|
-
return newDecoder(rs, f.Columns, f.BatchSize)
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
tmp, err := copyToTempFile(f.TempDir, r)
|
|
26
|
-
if err != nil {
|
|
27
|
-
return nil, err
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
|
|
31
|
-
if err != nil {
|
|
32
|
-
_ = tmp.Close()
|
|
33
|
-
return nil, err
|
|
34
|
-
}
|
|
35
|
-
dec.closers = append(dec.closers, tmp)
|
|
36
|
-
return dec, nil
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// NewEncoder implements Format.
|
|
40
|
-
func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
|
|
41
|
-
return nil, fmt.Errorf("not implemented")
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// --------------------------------------------------------------------
|
|
45
|
-
|
|
46
|
-
type tempFile struct{ *os.File }
|
|
47
|
-
|
|
48
|
-
func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
|
|
49
|
-
w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
|
|
50
|
-
if err != nil {
|
|
51
|
-
return nil, err
|
|
52
|
-
}
|
|
53
|
-
if _, err := io.Copy(w, r); err != nil {
|
|
54
|
-
_ = w.Close()
|
|
55
|
-
_ = os.Remove(w.Name())
|
|
56
|
-
return nil, err
|
|
57
|
-
}
|
|
58
|
-
if err := w.Close(); err != nil {
|
|
59
|
-
_ = os.Remove(w.Name())
|
|
60
|
-
return nil, err
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
f, err := os.Open(w.Name())
|
|
64
|
-
if err != nil {
|
|
65
|
-
_ = os.Remove(w.Name())
|
|
66
|
-
return nil, err
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
return &tempFile{File: f}, nil
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
func (f tempFile) Close() error {
|
|
73
|
-
err := f.File.Close()
|
|
74
|
-
if e := os.Remove(f.Name()); e != nil {
|
|
75
|
-
err = e
|
|
76
|
-
}
|
|
77
|
-
return err
|
|
78
|
-
}
|
data/ext/parquet/parquet_test.go
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
package parquet_test
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"testing"
|
|
5
|
-
"time"
|
|
6
|
-
|
|
7
|
-
. "github.com/bsm/ginkgo"
|
|
8
|
-
. "github.com/bsm/gomega"
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
type mockStruct struct {
|
|
12
|
-
ID int `parquet:"id"`
|
|
13
|
-
Bool bool `parquet:"bool_col"`
|
|
14
|
-
TinyInt int8 `parquet:"tinyint_col"`
|
|
15
|
-
SmallUint uint16 `parquet:"smallint_col"`
|
|
16
|
-
StdInt int `parquet:"int_col"`
|
|
17
|
-
BigInt int64 `parquet:"bigint_col"`
|
|
18
|
-
Float *float32 `parquet:"float_col"`
|
|
19
|
-
Double float64 `parquet:"double_col"`
|
|
20
|
-
DateString string `parquet:"date_string_col"`
|
|
21
|
-
ByteString []byte `parquet:"string_col"`
|
|
22
|
-
Timestamp time.Time `parquet:"timestamp_col"`
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
func TestSuite(t *testing.T) {
|
|
26
|
-
RegisterFailHandler(Fail)
|
|
27
|
-
RunSpecs(t, "feedx/ext/parquet")
|
|
28
|
-
}
|
data/ext/parquet/reader.go
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
package parquet
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"io"
|
|
5
|
-
|
|
6
|
-
kpq "github.com/bsm/parquet-go/parquet"
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
type columnReader struct {
|
|
10
|
-
file *kpq.File
|
|
11
|
-
col kpq.Column
|
|
12
|
-
rowGroup int
|
|
13
|
-
|
|
14
|
-
chunk *kpq.ColumnChunkReader
|
|
15
|
-
values []interface{}
|
|
16
|
-
dLevels []uint16
|
|
17
|
-
rLevels []uint16
|
|
18
|
-
|
|
19
|
-
n, i, vi int // chunk stats
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
|
|
23
|
-
return &columnReader{
|
|
24
|
-
file: file,
|
|
25
|
-
col: col,
|
|
26
|
-
values: make([]interface{}, batchSize),
|
|
27
|
-
dLevels: make([]uint16, batchSize),
|
|
28
|
-
rLevels: make([]uint16, batchSize),
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
func (c *columnReader) Name() string {
|
|
33
|
-
return c.col.String()
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
func (c *columnReader) Next() (interface{}, error) {
|
|
37
|
-
if err := c.ensureChunk(); err != nil {
|
|
38
|
-
return nil, err
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
if err := c.ensureValues(); err == kpq.EndOfChunk {
|
|
42
|
-
c.chunk = nil
|
|
43
|
-
return c.Next()
|
|
44
|
-
} else if err != nil {
|
|
45
|
-
return nil, err
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
dLevel := c.dLevels[c.i]
|
|
49
|
-
c.i++
|
|
50
|
-
|
|
51
|
-
if notNull := dLevel == c.col.MaxD(); notNull {
|
|
52
|
-
val := c.values[c.vi]
|
|
53
|
-
c.vi++
|
|
54
|
-
return val, nil
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return nil, nil
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
func (c *columnReader) ensureChunk() error {
|
|
61
|
-
if c.chunk != nil {
|
|
62
|
-
return nil
|
|
63
|
-
}
|
|
64
|
-
if c.rowGroup >= len(c.file.MetaData.RowGroups) {
|
|
65
|
-
return io.EOF
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
rd, err := c.file.NewReader(c.col, c.rowGroup)
|
|
69
|
-
if err != nil {
|
|
70
|
-
return err
|
|
71
|
-
}
|
|
72
|
-
c.chunk = rd
|
|
73
|
-
c.rowGroup++
|
|
74
|
-
return nil
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
func (c *columnReader) ensureValues() error {
|
|
78
|
-
if c.n != 0 && c.i < c.n {
|
|
79
|
-
return nil
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
|
|
83
|
-
if err != nil {
|
|
84
|
-
return err
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
c.n, c.i, c.vi = n, 0, 0
|
|
88
|
-
return nil
|
|
89
|
-
}
|
|
Binary file
|
data/ext/parquet/types.go
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
package parquet
|
|
2
|
-
|
|
3
|
-
import (
|
|
4
|
-
"reflect"
|
|
5
|
-
"strings"
|
|
6
|
-
"sync"
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
type structFields map[string]int
|
|
10
|
-
|
|
11
|
-
var fieldCache sync.Map // map[reflect.Type]structFields
|
|
12
|
-
|
|
13
|
-
// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
|
|
14
|
-
//
|
|
15
|
-
// "Inspired" by https://golang.org/src/encoding/json/encode.go
|
|
16
|
-
// Copyright 2010 The Go Authors. All rights reserved.
|
|
17
|
-
func cachedTypeFields(t reflect.Type) structFields {
|
|
18
|
-
if f, ok := fieldCache.Load(t); ok {
|
|
19
|
-
return f.(structFields)
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
f, _ := fieldCache.LoadOrStore(t, typeFields(t))
|
|
23
|
-
return f.(structFields)
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
func tagName(tag string) string {
|
|
27
|
-
if pos := strings.Index(tag, ","); pos != -1 {
|
|
28
|
-
return tag[:pos]
|
|
29
|
-
}
|
|
30
|
-
return tag
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
// "Inspired" by https://golang.org/src/encoding/json/encode.go
|
|
34
|
-
// Copyright 2010 The Go Authors. All rights reserved.
|
|
35
|
-
func typeFields(t reflect.Type) structFields {
|
|
36
|
-
index := make(map[string]int, t.NumField())
|
|
37
|
-
for i := 0; i < t.NumField(); i++ {
|
|
38
|
-
field := t.Field(i)
|
|
39
|
-
tag := field.Tag.Get("parquet")
|
|
40
|
-
if tag == "-" {
|
|
41
|
-
continue
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
name := field.Name
|
|
45
|
-
if s := tagName(tag); s != "" {
|
|
46
|
-
name = s
|
|
47
|
-
}
|
|
48
|
-
index[name] = i
|
|
49
|
-
}
|
|
50
|
-
return index
|
|
51
|
-
}
|
data/lib/feedx/format/parquet.rb
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
require 'parquet'
|
|
2
|
-
require 'tmpdir'
|
|
3
|
-
|
|
4
|
-
class Feedx::Format::Parquet < Feedx::Format::Abstract
|
|
5
|
-
class Record < Arrow::Record
|
|
6
|
-
def each_pair
|
|
7
|
-
container.columns.each do |col|
|
|
8
|
-
yield col.name, col[index]
|
|
9
|
-
end
|
|
10
|
-
end
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
class Decoder < Feedx::Format::Abstract::Decoder
|
|
14
|
-
def initialize(io, **)
|
|
15
|
-
super(io)
|
|
16
|
-
|
|
17
|
-
@table = read_table
|
|
18
|
-
@cursor = 0
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def eof?
|
|
22
|
-
@cursor >= @table.n_rows
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def decode(target, **)
|
|
26
|
-
return if eof?
|
|
27
|
-
|
|
28
|
-
rec = Record.new(@table, @cursor)
|
|
29
|
-
@cursor += 1
|
|
30
|
-
|
|
31
|
-
target = target.allocate if target.is_a?(Class)
|
|
32
|
-
target.from_parquet(rec)
|
|
33
|
-
target
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
private
|
|
37
|
-
|
|
38
|
-
def read_table
|
|
39
|
-
tmpname = ::Dir::Tmpname.create('feedx-parquet') {|path, *| path }
|
|
40
|
-
IO.copy_stream(@io, tmpname)
|
|
41
|
-
|
|
42
|
-
@table = Arrow::Table.load(tmpname, format: 'parquet')
|
|
43
|
-
ensure
|
|
44
|
-
unlink!(tmpname) if tmpname
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def unlink!(tmpname)
|
|
48
|
-
File.unlink(tmpname)
|
|
49
|
-
rescue Errno::ENOENT
|
|
50
|
-
nil
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
class Encoder < Feedx::Format::Abstract::Encoder
|
|
55
|
-
attr_reader :schema
|
|
56
|
-
|
|
57
|
-
def initialize(io, schema:, buffer_size: 1 << 20, batch_size: 10_000)
|
|
58
|
-
super(io)
|
|
59
|
-
|
|
60
|
-
@schema = schema
|
|
61
|
-
@batch_size = batch_size.to_i
|
|
62
|
-
@buffer_size = buffer_size.to_i
|
|
63
|
-
|
|
64
|
-
@tmpname = ::Dir::Tmpname.create('feedx-parquet') {|path, *| path }
|
|
65
|
-
@output = Arrow::FileOutputStream.new(@tmpname, append: false)
|
|
66
|
-
@writer = Parquet::ArrowFileWriter.new(@schema, @output)
|
|
67
|
-
@batch = []
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def encode(msg, **opts)
|
|
71
|
-
msg = msg.to_parquet(@schema, **opts) if msg.respond_to?(:to_parquet)
|
|
72
|
-
|
|
73
|
-
res = @batch.push(msg)
|
|
74
|
-
flush_table if @batch.size >= @batch_size
|
|
75
|
-
res
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
def close
|
|
79
|
-
flush_table unless @batch.empty?
|
|
80
|
-
|
|
81
|
-
@writer.close
|
|
82
|
-
@output.close
|
|
83
|
-
IO.copy_stream(@tmpname, @io)
|
|
84
|
-
ensure
|
|
85
|
-
unlink!
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
private
|
|
89
|
-
|
|
90
|
-
def flush_table
|
|
91
|
-
table = Arrow::RecordBatch.new(@schema, @batch).to_table
|
|
92
|
-
@writer.write_table table, @buffer_size
|
|
93
|
-
@batch.clear
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
def unlink!
|
|
97
|
-
File.unlink(@tmpname)
|
|
98
|
-
rescue Errno::ENOENT
|
|
99
|
-
nil
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Cache::Memory do
|
|
4
|
-
it 'read/writes' do
|
|
5
|
-
expect(subject.fetch('key')).to be_nil
|
|
6
|
-
expect(subject.fetch('key') { 'value' }).to eq('value')
|
|
7
|
-
expect(subject.fetch('key')).to eq('value')
|
|
8
|
-
expect(subject.fetch('key') { 'other' }).to eq('value')
|
|
9
|
-
expect(subject.fetch('key')).to eq('value')
|
|
10
|
-
|
|
11
|
-
subject.write('key', 'new-value')
|
|
12
|
-
expect(subject.read('key')).to eq('new-value')
|
|
13
|
-
expect(subject.fetch('key')).to eq('new-value')
|
|
14
|
-
|
|
15
|
-
subject.clear
|
|
16
|
-
expect(subject.fetch('key')).to be_nil
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
it 'writes strings' do
|
|
20
|
-
subject.write('key', 5)
|
|
21
|
-
expect(subject.read('key')).to eq('5')
|
|
22
|
-
end
|
|
23
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Cache::Value do
|
|
4
|
-
subject do
|
|
5
|
-
described_class.new(Feedx::Cache::Memory.new, 'key')
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
it 'read/writes' do
|
|
9
|
-
expect(subject.fetch).to be_nil
|
|
10
|
-
expect(subject.fetch { 'value' }).to eq('value')
|
|
11
|
-
expect(subject.fetch).to eq('value')
|
|
12
|
-
expect(subject.fetch { 'other' }).to eq('value')
|
|
13
|
-
expect(subject.fetch).to eq('value')
|
|
14
|
-
|
|
15
|
-
subject.write('new-value')
|
|
16
|
-
expect(subject.read).to eq('new-value')
|
|
17
|
-
expect(subject.fetch).to eq('new-value')
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Compression::Gzip do
|
|
4
|
-
it 'wraps readers/writers' do
|
|
5
|
-
wio = StringIO.new
|
|
6
|
-
subject.writer(wio) {|w| w.write 'xyz' * 1000 }
|
|
7
|
-
expect(wio.size).to be_within(20).of(40)
|
|
8
|
-
expect(wio.string.encoding).to eq(Encoding::BINARY)
|
|
9
|
-
|
|
10
|
-
data = ''
|
|
11
|
-
StringIO.open(wio.string) do |rio|
|
|
12
|
-
subject.reader(rio) {|z| data = z.read }
|
|
13
|
-
end
|
|
14
|
-
expect(data.size).to eq(3000)
|
|
15
|
-
expect(data.encoding).to eq(Encoding.default_external)
|
|
16
|
-
end
|
|
17
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Compression::None do
|
|
4
|
-
it 'wraps readers/writers' do
|
|
5
|
-
wio = StringIO.new
|
|
6
|
-
subject.writer(wio) {|w| w.write 'xyz' * 1000 }
|
|
7
|
-
expect(wio.size).to eq(3000)
|
|
8
|
-
|
|
9
|
-
data = ''
|
|
10
|
-
StringIO.open(wio.string) do |rio|
|
|
11
|
-
subject.reader(rio) {|z| data = z.read }
|
|
12
|
-
end
|
|
13
|
-
expect(data.size).to eq(3000)
|
|
14
|
-
end
|
|
15
|
-
end
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Compression do
|
|
4
|
-
it 'resolves' do
|
|
5
|
-
expect(described_class.resolve(:gzip)).to be_instance_of(described_class::Gzip)
|
|
6
|
-
expect(described_class.resolve(:gz)).to be_instance_of(described_class::Gzip)
|
|
7
|
-
expect(described_class.resolve(nil)).to be_instance_of(described_class::None)
|
|
8
|
-
expect { described_class.resolve(:txt) }.to raise_error(/invalid compression txt/)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
it 'detects' do
|
|
12
|
-
expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::Gzip)
|
|
13
|
-
expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::Gzip)
|
|
14
|
-
expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::None)
|
|
15
|
-
expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Gzip)
|
|
16
|
-
expect(described_class.detect('path/to/file.pb.gz')).to be_instance_of(described_class::Gzip)
|
|
17
|
-
expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::None)
|
|
18
|
-
end
|
|
19
|
-
end
|
data/spec/feedx/consumer_spec.rb
DELETED
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Consumer do
|
|
4
|
-
let(:bucket) { BFS::Bucket::InMem.new }
|
|
5
|
-
let(:klass) { Feedx::TestCase::Model }
|
|
6
|
-
let(:cache) { Feedx::Cache::Memory.new.value('my-consumer') }
|
|
7
|
-
|
|
8
|
-
before { allow(BFS).to receive(:resolve).and_return(bucket) }
|
|
9
|
-
|
|
10
|
-
it 'rejects invalid inputs' do
|
|
11
|
-
expect do
|
|
12
|
-
described_class.each('mock:///dir/file.txt', klass)
|
|
13
|
-
end.to raise_error(/unable to detect format/)
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
it 'consumes feeds' do
|
|
17
|
-
url = mock_produce!
|
|
18
|
-
csm = described_class.new(url, klass)
|
|
19
|
-
expect(csm).to be_a(Enumerable)
|
|
20
|
-
|
|
21
|
-
cnt = csm.count do |rec|
|
|
22
|
-
expect(rec).to be_instance_of(klass)
|
|
23
|
-
true
|
|
24
|
-
end
|
|
25
|
-
expect(cnt).to eq(300)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
it 'performs conditionally' do
|
|
29
|
-
url = mock_produce! last_modified: Time.at(1515151515)
|
|
30
|
-
expect(described_class.new(url, klass, cache: cache).count).to eq(300)
|
|
31
|
-
expect(described_class.new(url, klass, cache: cache).count).to eq(0)
|
|
32
|
-
|
|
33
|
-
url = mock_produce!
|
|
34
|
-
expect(described_class.new(url, klass, cache: cache).count).to eq(300)
|
|
35
|
-
expect(described_class.new(url, klass, cache: cache).count).to eq(300)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
private
|
|
39
|
-
|
|
40
|
-
def mock_produce!(enum: mock_enum, **opts)
|
|
41
|
-
url = 'mock:///dir/file.json'
|
|
42
|
-
Feedx::Producer.perform url, enum: enum, **opts
|
|
43
|
-
url
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
def mock_enum
|
|
47
|
-
%w[x y z].map {|t| Feedx::TestCase::Model.new(t) } * 100
|
|
48
|
-
end
|
|
49
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format::Abstract do
|
|
4
|
-
subject { Feedx::Format::JSON.new }
|
|
5
|
-
|
|
6
|
-
let(:wio) { StringIO.new }
|
|
7
|
-
let(:rio) { StringIO.open(wio.string) }
|
|
8
|
-
|
|
9
|
-
it 'decodes each' do
|
|
10
|
-
subject.encoder wio do |enc|
|
|
11
|
-
enc.encode(Feedx::TestCase::Model.new('X'))
|
|
12
|
-
enc.encode(Feedx::TestCase::Model.new('Y'))
|
|
13
|
-
enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
subject.decoder rio do |dec|
|
|
17
|
-
acc = dec.decode_each(Feedx::TestCase::Model).to_a
|
|
18
|
-
expect(acc.map(&:title)).to eq(%w[X Y Z])
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format::JSON do
|
|
4
|
-
let(:wio) { StringIO.new }
|
|
5
|
-
let(:rio) { StringIO.open(wio.string) }
|
|
6
|
-
|
|
7
|
-
it 'encode/decodes' do
|
|
8
|
-
subject.encoder wio do |enc|
|
|
9
|
-
enc.encode(Feedx::TestCase::Model.new('X'))
|
|
10
|
-
enc.encode(Feedx::TestCase::Model.new('Y'))
|
|
11
|
-
enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
|
|
12
|
-
end
|
|
13
|
-
expect(wio.string).to eq(<<~JSON)
|
|
14
|
-
{"title":"X","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
15
|
-
{"title":"Y","updated_at":"2018-01-05 11:25:15 UTC"}
|
|
16
|
-
{"title":"Z"}
|
|
17
|
-
JSON
|
|
18
|
-
|
|
19
|
-
subject.decoder rio do |dec|
|
|
20
|
-
expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
21
|
-
expect(dec.decode(Feedx::TestCase::Model.new('O'))).to eq(Feedx::TestCase::Model.new('Y'))
|
|
22
|
-
expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
|
|
23
|
-
expect(dec.decode(Feedx::TestCase::Model)).to be_nil
|
|
24
|
-
expect(dec).to be_eof
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format::Parquet do
|
|
4
|
-
let(:wio) { StringIO.new }
|
|
5
|
-
let(:rio) { StringIO.open(wio.string) }
|
|
6
|
-
|
|
7
|
-
let(:schema) do
|
|
8
|
-
Arrow::Schema.new([
|
|
9
|
-
Arrow::Field.new('title', :string),
|
|
10
|
-
Arrow::Field.new('updated_at', type: :timestamp, unit: :second),
|
|
11
|
-
])
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
it 'encode/decodes' do
|
|
15
|
-
subject.encoder wio, schema: schema, batch_size: 2 do |enc|
|
|
16
|
-
enc.encode(Feedx::TestCase::Model.new('X'))
|
|
17
|
-
enc.encode(Feedx::TestCase::Model.new('Y'))
|
|
18
|
-
enc.encode(Feedx::TestCase::Model.new('Z'))
|
|
19
|
-
end
|
|
20
|
-
expect(wio.string.bytesize).to be_within(100).of(1100)
|
|
21
|
-
|
|
22
|
-
subject.decoder rio do |dec|
|
|
23
|
-
expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('X'))
|
|
24
|
-
expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Y'))
|
|
25
|
-
expect(dec.decode(Feedx::TestCase::Model)).to eq(Feedx::TestCase::Model.new('Z'))
|
|
26
|
-
expect(dec.decode(Feedx::TestCase::Model)).to be_nil
|
|
27
|
-
expect(dec).to be_eof
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format::Protobuf do
|
|
4
|
-
let(:wio) { StringIO.new }
|
|
5
|
-
let(:rio) { StringIO.open(wio.string) }
|
|
6
|
-
|
|
7
|
-
it 'encode/decodes' do
|
|
8
|
-
subject.encoder wio do |enc|
|
|
9
|
-
enc.encode(Feedx::TestCase::Model.new('X'))
|
|
10
|
-
enc.encode(Feedx::TestCase::Model.new('Y'))
|
|
11
|
-
enc.encode(Feedx::TestCase::Message.new(title: 'Z'))
|
|
12
|
-
end
|
|
13
|
-
expect(wio.string.bytes).to eq([3, 10, 1, 88] + [3, 10, 1, 89] + [3, 10, 1, 90])
|
|
14
|
-
|
|
15
|
-
subject.decoder rio do |dec|
|
|
16
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'X'))
|
|
17
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Y'))
|
|
18
|
-
expect(dec.decode(Feedx::TestCase::Message)).to eq(Feedx::TestCase::Message.new(title: 'Z'))
|
|
19
|
-
expect(dec.decode(Feedx::TestCase::Message)).to be_nil
|
|
20
|
-
expect(dec).to be_eof
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
data/spec/feedx/format_spec.rb
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
RSpec.describe Feedx::Format do
|
|
4
|
-
it 'resolves' do
|
|
5
|
-
expect(described_class.resolve(:json)).to be_instance_of(described_class::JSON)
|
|
6
|
-
expect(described_class.resolve(:pb)).to be_instance_of(described_class::Protobuf)
|
|
7
|
-
expect { described_class.resolve(:txt) }.to raise_error(/invalid format txt/)
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
it 'detects' do
|
|
11
|
-
expect(described_class.detect('path/to/file.json')).to be_instance_of(described_class::JSON)
|
|
12
|
-
expect(described_class.detect('path/to/file.jsonz')).to be_instance_of(described_class::JSON)
|
|
13
|
-
expect(described_class.detect('path/to/file.json.gz')).to be_instance_of(described_class::JSON)
|
|
14
|
-
expect(described_class.detect('path/to/file.pb')).to be_instance_of(described_class::Protobuf)
|
|
15
|
-
expect(described_class.detect('path/to/file.pbz')).to be_instance_of(described_class::Protobuf)
|
|
16
|
-
expect(described_class.detect('path/to/file.pb.z')).to be_instance_of(described_class::Protobuf)
|
|
17
|
-
expect do
|
|
18
|
-
described_class.detect('path/to/file.txt')
|
|
19
|
-
end.to raise_error(/unable to detect format/)
|
|
20
|
-
end
|
|
21
|
-
end
|