feedx 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +2 -0
  4. data/.travis.yml +13 -6
  5. data/Gemfile.lock +43 -20
  6. data/Makefile +5 -0
  7. data/consumer_test.go +5 -5
  8. data/feedx.gemspec +3 -2
  9. data/feedx_test.go +12 -9
  10. data/format.go +16 -16
  11. data/format_test.go +6 -7
  12. data/go.mod +5 -10
  13. data/go.sum +43 -24
  14. data/internal/testdata/testdata.pb.go +124 -0
  15. data/internal/testdata/testdata.proto +15 -0
  16. data/lib/feedx/cache/abstract.rb +2 -2
  17. data/lib/feedx/compression.rb +11 -4
  18. data/lib/feedx/compression/abstract.rb +2 -2
  19. data/lib/feedx/compression/gzip.rb +14 -2
  20. data/lib/feedx/compression/none.rb +4 -4
  21. data/lib/feedx/consumer.rb +17 -11
  22. data/lib/feedx/format.rb +18 -9
  23. data/lib/feedx/format/abstract.rb +42 -13
  24. data/lib/feedx/format/json.rb +12 -8
  25. data/lib/feedx/format/parquet.rb +102 -0
  26. data/lib/feedx/format/protobuf.rb +16 -8
  27. data/lib/feedx/producer.rb +20 -14
  28. data/lib/feedx/stream.rb +41 -25
  29. data/producer_test.go +1 -2
  30. data/reader_test.go +7 -8
  31. data/spec/feedx/compression/gzip_spec.rb +4 -2
  32. data/spec/feedx/compression/none_spec.rb +2 -2
  33. data/spec/feedx/compression_spec.rb +9 -9
  34. data/spec/feedx/consumer_spec.rb +6 -3
  35. data/spec/feedx/format/abstract_spec.rb +11 -8
  36. data/spec/feedx/format/json_spec.rb +12 -11
  37. data/spec/feedx/format/parquet_spec.rb +30 -0
  38. data/spec/feedx/format/protobuf_spec.rb +12 -11
  39. data/spec/feedx/format_spec.rb +8 -8
  40. data/spec/feedx/producer_spec.rb +6 -0
  41. data/spec/feedx/stream_spec.rb +26 -3
  42. data/spec/spec_helper.rb +17 -1
  43. data/writer_test.go +1 -1
  44. metadata +22 -3
@@ -0,0 +1,124 @@
1
+ // Code generated by protoc-gen-gogo. DO NOT EDIT.
2
+ // source: internal/testdata/testdata.proto
3
+
4
+ package testdata
5
+
6
+ import (
7
+ fmt "fmt"
8
+ proto "github.com/gogo/protobuf/proto"
9
+ math "math"
10
+ )
11
+
12
+ // Reference imports to suppress errors if they are not otherwise used.
13
+ var _ = proto.Marshal
14
+ var _ = fmt.Errorf
15
+ var _ = math.Inf
16
+
17
+ // This is a compile-time assertion to ensure that this generated file
18
+ // is compatible with the proto package it is being compiled against.
19
+ // A compilation error at this line likely means your copy of the
20
+ // proto package needs to be updated.
21
+ const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
22
+
23
+ type MockEnum int32
24
+
25
+ const (
26
+ MockEnum_UNKNOWN MockEnum = 0
27
+ MockEnum_FIRST MockEnum = 3
28
+ )
29
+
30
+ var MockEnum_name = map[int32]string{
31
+ 0: "UNKNOWN",
32
+ 3: "FIRST",
33
+ }
34
+
35
+ var MockEnum_value = map[string]int32{
36
+ "UNKNOWN": 0,
37
+ "FIRST": 3,
38
+ }
39
+
40
+ func (x MockEnum) String() string {
41
+ return proto.EnumName(MockEnum_name, int32(x))
42
+ }
43
+
44
+ func (MockEnum) EnumDescriptor() ([]byte, []int) {
45
+ return fileDescriptor_076a9f61cb4a1904, []int{0}
46
+ }
47
+
48
+ type MockMessage struct {
49
+ Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
50
+ Enum MockEnum `protobuf:"varint,2,opt,name=enum,proto3,enum=feedx.internal.testdata.MockEnum" json:"enum,omitempty"`
51
+ Height uint32 `protobuf:"varint,3,opt,name=height,proto3" json:"height,omitempty"`
52
+ XXX_NoUnkeyedLiteral struct{} `json:"-"`
53
+ XXX_unrecognized []byte `json:"-"`
54
+ XXX_sizecache int32 `json:"-"`
55
+ }
56
+
57
+ func (m *MockMessage) Reset() { *m = MockMessage{} }
58
+ func (m *MockMessage) String() string { return proto.CompactTextString(m) }
59
+ func (*MockMessage) ProtoMessage() {}
60
+ func (*MockMessage) Descriptor() ([]byte, []int) {
61
+ return fileDescriptor_076a9f61cb4a1904, []int{0}
62
+ }
63
+ func (m *MockMessage) XXX_Unmarshal(b []byte) error {
64
+ return xxx_messageInfo_MockMessage.Unmarshal(m, b)
65
+ }
66
+ func (m *MockMessage) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
67
+ return xxx_messageInfo_MockMessage.Marshal(b, m, deterministic)
68
+ }
69
+ func (m *MockMessage) XXX_Merge(src proto.Message) {
70
+ xxx_messageInfo_MockMessage.Merge(m, src)
71
+ }
72
+ func (m *MockMessage) XXX_Size() int {
73
+ return xxx_messageInfo_MockMessage.Size(m)
74
+ }
75
+ func (m *MockMessage) XXX_DiscardUnknown() {
76
+ xxx_messageInfo_MockMessage.DiscardUnknown(m)
77
+ }
78
+
79
+ var xxx_messageInfo_MockMessage proto.InternalMessageInfo
80
+
81
+ func (m *MockMessage) GetName() string {
82
+ if m != nil {
83
+ return m.Name
84
+ }
85
+ return ""
86
+ }
87
+
88
+ func (m *MockMessage) GetEnum() MockEnum {
89
+ if m != nil {
90
+ return m.Enum
91
+ }
92
+ return MockEnum_UNKNOWN
93
+ }
94
+
95
+ func (m *MockMessage) GetHeight() uint32 {
96
+ if m != nil {
97
+ return m.Height
98
+ }
99
+ return 0
100
+ }
101
+
102
+ func init() {
103
+ proto.RegisterEnum("feedx.internal.testdata.MockEnum", MockEnum_name, MockEnum_value)
104
+ proto.RegisterType((*MockMessage)(nil), "feedx.internal.testdata.MockMessage")
105
+ }
106
+
107
+ func init() { proto.RegisterFile("internal/testdata/testdata.proto", fileDescriptor_076a9f61cb4a1904) }
108
+
109
+ var fileDescriptor_076a9f61cb4a1904 = []byte{
110
+ // 199 bytes of a gzipped FileDescriptorProto
111
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x52, 0xc8, 0xcc, 0x2b, 0x49,
112
+ 0x2d, 0xca, 0x4b, 0xcc, 0xd1, 0x2f, 0x49, 0x2d, 0x2e, 0x49, 0x49, 0x2c, 0x49, 0x84, 0x33, 0xf4,
113
+ 0x0a, 0x8a, 0xf2, 0x4b, 0xf2, 0x85, 0xc4, 0xd3, 0x52, 0x53, 0x53, 0x2a, 0xf4, 0x60, 0xea, 0xf4,
114
+ 0x60, 0xd2, 0x4a, 0x05, 0x5c, 0xdc, 0xbe, 0xf9, 0xc9, 0xd9, 0xbe, 0xa9, 0xc5, 0xc5, 0x89, 0xe9,
115
+ 0xa9, 0x42, 0x42, 0x5c, 0x2c, 0x79, 0x89, 0xb9, 0xa9, 0x12, 0x8c, 0x0a, 0x8c, 0x1a, 0x9c, 0x41,
116
+ 0x60, 0xb6, 0x90, 0x29, 0x17, 0x4b, 0x6a, 0x5e, 0x69, 0xae, 0x04, 0x93, 0x02, 0xa3, 0x06, 0x9f,
117
+ 0x91, 0xa2, 0x1e, 0x0e, 0xa3, 0xf4, 0x40, 0xe6, 0xb8, 0xe6, 0x95, 0xe6, 0x06, 0x81, 0x95, 0x0b,
118
+ 0x89, 0x71, 0xb1, 0x65, 0xa4, 0x66, 0xa6, 0x67, 0x94, 0x48, 0x30, 0x2b, 0x30, 0x6a, 0xf0, 0x06,
119
+ 0x41, 0x79, 0x5a, 0x4a, 0x5c, 0x1c, 0x30, 0x95, 0x42, 0xdc, 0x5c, 0xec, 0xa1, 0x7e, 0xde, 0x7e,
120
+ 0xfe, 0xe1, 0x7e, 0x02, 0x0c, 0x42, 0x9c, 0x5c, 0xac, 0x6e, 0x9e, 0x41, 0xc1, 0x21, 0x02, 0xcc,
121
+ 0x4e, 0x2a, 0x51, 0x4a, 0xe9, 0x99, 0x25, 0x19, 0xa5, 0x49, 0x7a, 0xc9, 0xf9, 0xb9, 0xfa, 0x60,
122
+ 0x0b, 0xf5, 0x31, 0xfc, 0x98, 0xc4, 0x06, 0xf6, 0x9b, 0x31, 0x20, 0x00, 0x00, 0xff, 0xff, 0x42,
123
+ 0xf6, 0x49, 0xb3, 0xff, 0x00, 0x00, 0x00,
124
+ }
@@ -0,0 +1,15 @@
1
+ syntax = "proto3";
2
+
3
+ package feedx.internal.testdata;
4
+ option go_package = "github.com/feedx/internal/testdata";
5
+
6
+ enum MockEnum {
7
+ UNKNOWN = 0;
8
+ FIRST = 3;
9
+ }
10
+
11
+ message MockMessage {
12
+ string name = 1;
13
+ MockEnum enum = 2;
14
+ uint32 height = 3;
15
+ }
@@ -5,12 +5,12 @@ class Feedx::Cache::Abstract
5
5
  end
6
6
 
7
7
  # Read reads a key.
8
- def read(_key, **_opts)
8
+ def read(_key, **)
9
9
  raise 'Not implemented'
10
10
  end
11
11
 
12
12
  # Write writes a key/value pair.
13
- def write(_key, _value, **_opts)
13
+ def write(_key, _value, **)
14
14
  raise 'Not implemented'
15
15
  end
16
16
 
@@ -5,12 +5,19 @@ module Feedx
5
5
  autoload :Gzip, 'feedx/compression/gzip'
6
6
 
7
7
  class << self
8
+ def validate!(kind)
9
+ raise ArgumentError, "#{kind} does not implement #reader(io, &block)" unless kind.respond_to?(:reader)
10
+ raise ArgumentError, "#{kind} does not implement #writer(io, &block)" unless kind.respond_to?(:writer)
11
+
12
+ kind
13
+ end
14
+
8
15
  def resolve(name)
9
16
  case name.to_s
10
17
  when 'gz', 'gzip'
11
- Gzip
18
+ Gzip.new
12
19
  when ''
13
- None
20
+ None.new
14
21
  else
15
22
  raise ArgumentError, "invalid compression #{name}"
16
23
  end
@@ -18,9 +25,9 @@ module Feedx
18
25
 
19
26
  def detect(path)
20
27
  if File.extname(path)[-1] == 'z'
21
- Gzip
28
+ Gzip.new
22
29
  else
23
- None
30
+ None.new
24
31
  end
25
32
  end
26
33
  end
@@ -1,9 +1,9 @@
1
1
  class Feedx::Compression::Abstract
2
- def self.reader(_io, &_block)
2
+ def reader(_io, **, &_block)
3
3
  raise 'Not implemented'
4
4
  end
5
5
 
6
- def self.writer(_io, &_block)
6
+ def writer(_io, **, &_block)
7
7
  raise 'Not implemented'
8
8
  end
9
9
  end
@@ -1,11 +1,23 @@
1
1
  require 'zlib'
2
2
 
3
3
  class Feedx::Compression::Gzip < Feedx::Compression::Abstract
4
- def self.reader(io, &block)
4
+ def reader(io, **, &block)
5
+ force_binmode(io)
5
6
  Zlib::GzipReader.wrap(io, &block)
6
7
  end
7
8
 
8
- def self.writer(io, &block)
9
+ def writer(io, **, &block)
10
+ force_binmode(io)
9
11
  Zlib::GzipWriter.wrap(io, &block)
10
12
  end
13
+
14
+ private
15
+
16
+ def force_binmode(io)
17
+ if io.respond_to?(:binmode)
18
+ io.binmode
19
+ elsif io.respond_to?(:set_encoding)
20
+ io.set_encoding(Encoding::BINARY)
21
+ end
22
+ end
11
23
  end
@@ -1,9 +1,9 @@
1
1
  class Feedx::Compression::None < Feedx::Compression::Abstract
2
- def self.reader(io, &block)
3
- block.call(io)
2
+ def reader(io, **)
3
+ yield(io)
4
4
  end
5
5
 
6
- def self.writer(io, &block)
7
- block.call(io)
6
+ def writer(io, **)
7
+ yield(io)
8
8
  end
9
9
  end
@@ -8,41 +8,47 @@ module Feedx
8
8
  include Enumerable
9
9
 
10
10
  # See constructor.
11
- def self.each(url, klass, opts = {}, &block)
12
- new(url, klass, opts).each(&block)
11
+ def self.each(url, klass, **opts, &block)
12
+ new(url, klass, **opts).each(&block)
13
13
  end
14
14
 
15
15
  # @param [String] url the destination URL.
16
16
  # @param [Class] klass the record class.
17
17
  # @param [Hash] opts options
18
18
  # @option opts [Symbol,Class<Feedx::Format::Abstract>] :format custom formatter. Default: from file extension.
19
- # @option opts [Hash] :format_options format decode options. Default: {}.
20
19
  # @option opts [Symbol,Class<Feedx::Compression::Abstract>] :compress enable compression. Default: from file extension.
21
20
  # @option opts [Feedx::Cache::Value] :cache cache value to store remote last modified time and consume conditionally.
22
- def initialize(url, klass, opts = {})
23
- @klass = klass
24
- @stream = Feedx::Stream.new(url, opts)
25
- @fmt_opts = opts[:format_options] || {}
26
- @cache = opts[:cache]
21
+ def initialize(url, klass, format_options: {}, cache: nil, **opts)
22
+ @klass = klass
23
+ @url = url
24
+ @opts = opts.merge(format_options)
25
+ @cache = cache
26
+
27
+ return if format_options.empty? || (defined?(Gem::Deprecate) && Gem::Deprecate.skip)
28
+
29
+ warn "WARNING: passing format_options is deprecated; pass the options inline instead (called from #{caller(2..2).first})."
27
30
  end
28
31
 
29
32
  # @return [Boolean] returns true if performed.
30
33
  def each(&block)
34
+ stream = Feedx::Stream.new(@url, **@opts)
31
35
  remote_rev = nil
32
36
 
33
37
  if @cache
34
- metadata = @stream.blob.info.metadata
38
+ metadata = stream.blob.info.metadata
35
39
  local_rev = @cache.read.to_i
36
40
  remote_rev = (metadata[META_LAST_MODIFIED] || metadata[META_LAST_MODIFIED_DC]).to_i
37
41
  return false if remote_rev.positive? && remote_rev <= local_rev
38
42
  end
39
43
 
40
- @stream.open do |fmt|
41
- fmt.decode_each(@klass, **@fmt_opts, &block)
44
+ stream.open do |fmt|
45
+ fmt.decode_each(@klass, **@opts, &block)
42
46
  end
43
47
  @cache.write(remote_rev) if @cache && remote_rev
44
48
 
45
49
  true
50
+ ensure
51
+ stream&.close
46
52
  end
47
53
  end
48
54
  end
@@ -2,13 +2,19 @@ module Feedx
2
2
  module Format
3
3
  autoload :Abstract, 'feedx/format/abstract'
4
4
  autoload :JSON, 'feedx/format/json'
5
+ autoload :Parquet, 'feedx/format/parquet'
5
6
  autoload :Protobuf, 'feedx/format/protobuf'
6
7
 
7
8
  class << self
8
- def register(ext, kind)
9
- raise ArgumentError, "#{kind} is not a subclass of Feedx::Format::Abstract" unless kind.is_a?(Class) && kind < Abstract
9
+ def validate!(kind)
10
+ raise ArgumentError, "#{kind} does not implement #encoder(io, &block)" unless kind.respond_to?(:encoder)
11
+ raise ArgumentError, "#{kind} does not implement #decoder(io, &block)" unless kind.respond_to?(:decoder)
12
+
13
+ kind
14
+ end
10
15
 
11
- registry[ext.to_s] = kind
16
+ def register(ext, kind)
17
+ registry[ext.to_s] = validate!(kind)
12
18
  end
13
19
 
14
20
  def resolve(name)
@@ -33,6 +39,9 @@ module Feedx
33
39
  def registry
34
40
  @registry ||= {
35
41
  'json' => :JSON,
42
+ 'jsonl' => :JSON,
43
+ 'ndjson' => :JSON,
44
+ 'parquet' => :Parquet,
36
45
  'pb' => :Protobuf,
37
46
  'proto' => :Protobuf,
38
47
  'protobuf' => :Protobuf,
@@ -40,13 +49,13 @@ module Feedx
40
49
  end
41
50
 
42
51
  def _resolve(name)
43
- name = name.to_s
44
- klass = registry[name]
45
- if klass.is_a?(Symbol)
46
- klass = const_get(klass)
47
- registry[name.to_s] = klass
52
+ name = name.to_s
53
+ kind = registry[name]
54
+ if kind.is_a?(Symbol)
55
+ kind = const_get(kind).new
56
+ registry[name.to_s] = kind
48
57
  end
49
- klass
58
+ kind
50
59
  end
51
60
  end
52
61
  end
@@ -1,25 +1,54 @@
1
1
  class Feedx::Format::Abstract
2
- def initialize(io)
3
- @io = io
2
+ def decoder(io, **opts, &block)
3
+ self.class::Decoder.open(io, **opts, &block)
4
4
  end
5
5
 
6
- def eof?
7
- @io.eof?
6
+ def encoder(io, **opts, &block)
7
+ self.class::Encoder.open(io, **opts, &block)
8
8
  end
9
9
 
10
- def decode_each(klass, **opts)
11
- if block_given?
12
- yield decode(klass, **opts) until eof?
13
- else
14
- Enumerator.new {|y| y << decode(klass, **opts) until eof? }
10
+ class Wrapper
11
+ def self.open(io, **opts)
12
+ inst = new(io, **opts)
13
+ yield inst
14
+ ensure
15
+ inst&.close
16
+ end
17
+
18
+ def initialize(io, **)
19
+ @io = io
15
20
  end
16
21
  end
17
22
 
18
- def decode(_klass, **)
19
- raise 'Not implemented'
23
+ class Decoder < Wrapper
24
+ def eof?
25
+ @io.eof?
26
+ end
27
+
28
+ def decode_each(target, **opts)
29
+ if block_given?
30
+ yield decode(target, **opts) until eof?
31
+ else
32
+ Enumerator.new do |acc|
33
+ acc << decode(target, **opts) until eof?
34
+ end
35
+ end
36
+ end
37
+
38
+ def decode(_target, **)
39
+ raise 'Not implemented'
40
+ end
41
+
42
+ def close; end
20
43
  end
21
44
 
22
- def encode(_msg, **)
23
- raise 'Not implemented'
45
+ class Encoder < Wrapper
46
+ def encode(_msg, **)
47
+ raise 'Not implemented'
48
+ end
49
+
50
+ def close
51
+ @io.flush if @io.respond_to?(:flush)
52
+ end
24
53
  end
25
54
  end
@@ -1,16 +1,20 @@
1
1
  require 'json'
2
2
 
3
3
  class Feedx::Format::JSON < Feedx::Format::Abstract
4
- def decode(obj, **)
5
- line = @io.gets
6
- return unless line
4
+ class Decoder < Feedx::Format::Abstract::Decoder
5
+ def decode(target, **)
6
+ line = @io.gets
7
+ return unless line
7
8
 
8
- obj = obj.allocate if obj.is_a?(Class)
9
- obj.from_json(line)
10
- obj
9
+ target = target.allocate if target.is_a?(Class)
10
+ target.from_json(line)
11
+ target
12
+ end
11
13
  end
12
14
 
13
- def encode(msg, **opts)
14
- @io.write msg.to_json(**opts) << "\n"
15
+ class Encoder < Feedx::Format::Abstract::Encoder
16
+ def encode(msg, **opts)
17
+ @io.write msg.to_json(**opts) << "\n"
18
+ end
15
19
  end
16
20
  end