feedx 0.10.2 → 0.12.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +2 -0
  5. data/.travis.yml +12 -2
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +50 -30
  8. data/Makefile +10 -5
  9. data/compression.go +18 -0
  10. data/compression_test.go +12 -0
  11. data/consumer_test.go +5 -4
  12. data/ext/parquet/decoder.go +170 -0
  13. data/ext/parquet/decoder_test.go +88 -0
  14. data/ext/parquet/go.mod +12 -0
  15. data/ext/parquet/go.sum +134 -0
  16. data/ext/parquet/parquet.go +78 -0
  17. data/ext/parquet/parquet_test.go +28 -0
  18. data/ext/parquet/reader.go +89 -0
  19. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  20. data/ext/parquet/types.go +51 -0
  21. data/feedx.gemspec +3 -2
  22. data/feedx_test.go +8 -24
  23. data/format.go +50 -20
  24. data/format_test.go +8 -6
  25. data/go.mod +9 -11
  26. data/go.sum +76 -28
  27. data/internal/testdata/testdata.pb.go +223 -0
  28. data/internal/testdata/testdata.proto +15 -0
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression.rb +11 -4
  32. data/lib/feedx/compression/abstract.rb +2 -2
  33. data/lib/feedx/compression/gzip.rb +14 -16
  34. data/lib/feedx/compression/none.rb +4 -4
  35. data/lib/feedx/consumer.rb +15 -9
  36. data/lib/feedx/format.rb +18 -9
  37. data/lib/feedx/format/abstract.rb +42 -13
  38. data/lib/feedx/format/json.rb +12 -8
  39. data/lib/feedx/format/parquet.rb +102 -0
  40. data/lib/feedx/format/protobuf.rb +16 -8
  41. data/lib/feedx/producer.rb +27 -22
  42. data/lib/feedx/stream.rb +36 -23
  43. data/producer_test.go +1 -2
  44. data/reader_test.go +6 -6
  45. data/spec/feedx/compression/gzip_spec.rb +2 -2
  46. data/spec/feedx/compression/none_spec.rb +2 -2
  47. data/spec/feedx/compression_spec.rb +9 -9
  48. data/spec/feedx/consumer_spec.rb +1 -1
  49. data/spec/feedx/format/abstract_spec.rb +11 -8
  50. data/spec/feedx/format/json_spec.rb +17 -16
  51. data/spec/feedx/format/parquet_spec.rb +30 -0
  52. data/spec/feedx/format/protobuf_spec.rb +12 -11
  53. data/spec/feedx/format_spec.rb +8 -8
  54. data/spec/feedx/producer_spec.rb +6 -0
  55. data/spec/feedx/stream_spec.rb +43 -6
  56. data/spec/spec_helper.rb +17 -1
  57. metadata +33 -5
@@ -0,0 +1,88 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "bytes"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+ "time"
9
+
10
+ "github.com/bsm/feedx"
11
+ "github.com/bsm/feedx/ext/parquet"
12
+ . "github.com/onsi/ginkgo"
13
+ . "github.com/onsi/gomega"
14
+ )
15
+
16
+ var _ = Describe("Decoder", func() {
17
+ var subject feedx.FormatDecoder
18
+ var fixture *os.File
19
+
20
+ f32ptr := func(f float32) *float32 { return &f }
21
+
22
+ BeforeEach(func() {
23
+ var err error
24
+ fixture, err = os.Open("testdata/alltypes_plain.parquet")
25
+ Expect(err).NotTo(HaveOccurred())
26
+
27
+ format := &parquet.Format{BatchSize: 3}
28
+ subject, err = format.NewDecoder(fixture)
29
+ Expect(err).NotTo(HaveOccurred())
30
+ })
31
+
32
+ AfterEach(func() {
33
+ Expect(subject.Close()).To(Succeed())
34
+ Expect(fixture.Close()).To(Succeed())
35
+ })
36
+
37
+ It("should decode", func() {
38
+ v1 := new(mockStruct)
39
+ Expect(subject.Decode(v1)).To(Succeed())
40
+ Expect(v1).To(Equal(&mockStruct{
41
+ ID: 4,
42
+ Bool: true,
43
+ Float: f32ptr(0),
44
+ DateString: "03/01/09", ByteString: []byte("0"),
45
+ Timestamp: time.Unix(1235865600, 0),
46
+ }))
47
+
48
+ v2 := new(mockStruct)
49
+ Expect(subject.Decode(v2)).To(Succeed())
50
+ Expect(v2).To(Equal(&mockStruct{
51
+ ID: 5,
52
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
53
+ Float: f32ptr(1.1), Double: 10.1,
54
+ DateString: "03/01/09", ByteString: []byte("1"),
55
+ Timestamp: time.Unix(1235865660, 0),
56
+ }))
57
+
58
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
59
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
60
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
61
+
62
+ v6 := new(mockStruct)
63
+ Expect(subject.Decode(v6)).To(Succeed())
64
+ Expect(v6).To(Equal(&mockStruct{
65
+ ID: 3,
66
+ Bool: false,
67
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
68
+ Float: f32ptr(1.1), Double: 10.1,
69
+ DateString: "02/01/09", ByteString: []byte("1"),
70
+ Timestamp: time.Unix(1233446460, 0),
71
+ }))
72
+
73
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
74
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
75
+
76
+ v9 := new(mockStruct)
77
+ Expect(subject.Decode(v9)).To(MatchError(io.EOF))
78
+ })
79
+
80
+ It("should open from non-file readers", func() {
81
+ bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
82
+ Expect(err).NotTo(HaveOccurred())
83
+
84
+ dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
85
+ Expect(err).NotTo(HaveOccurred())
86
+ Expect(dec.Close()).To(Succeed())
87
+ })
88
+ })
@@ -0,0 +1,12 @@
1
+ module github.com/bsm/feedx/ext/parquet
2
+
3
+ go 1.15
4
+
5
+ replace github.com/bsm/feedx => ../../
6
+
7
+ require (
8
+ github.com/bsm/feedx v0.12.2
9
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00
10
+ github.com/onsi/ginkgo v1.14.1
11
+ github.com/onsi/gomega v1.10.2
12
+ )
@@ -0,0 +1,134 @@
1
+ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
2
+ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
3
+ github.com/bmatcuk/doublestar v1.2.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
4
+ github.com/bmatcuk/doublestar v1.3.2 h1:mzUncgFmpzNUhIITFqGdZ8nUU0O7JTJzRO8VdkeLCSo=
5
+ github.com/bmatcuk/doublestar v1.3.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
6
+ github.com/bsm/bfs v0.10.4 h1:59I1FBEcIku/1MfPyIEeBfKm+ICaJ4lVEcago/YeCLg=
7
+ github.com/bsm/bfs v0.10.4/go.mod h1:N3md8kQvlteRDcfc8tqw759yW98dhj+6seWEVcg4CmM=
8
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
9
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
10
+ github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
11
+ github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
12
+ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
13
+ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
14
+ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
15
+ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
16
+ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
17
+ github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
18
+ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
19
+ github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
20
+ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
21
+ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
22
+ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
23
+ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
24
+ github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
25
+ github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
26
+ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
27
+ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
28
+ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
29
+ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
30
+ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
31
+ github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
32
+ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
33
+ github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
34
+ github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
35
+ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
36
+ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
37
+ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
38
+ github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
39
+ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
40
+ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
41
+ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
42
+ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
43
+ github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
44
+ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
45
+ github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
46
+ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
47
+ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
48
+ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
49
+ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
50
+ github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
51
+ github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
52
+ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
53
+ github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
54
+ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
55
+ github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
56
+ github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
57
+ github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
58
+ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
59
+ github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE=
60
+ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
61
+ github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
62
+ github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
63
+ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
64
+ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
65
+ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
66
+ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
67
+ golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
68
+ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
69
+ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
70
+ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
71
+ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
72
+ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
73
+ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
74
+ golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
75
+ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7 h1:AeiKBIuRw3UomYXSbLy0Mc2dDLfdtbT/IVn4keq83P0=
76
+ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
77
+ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
78
+ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
79
+ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
80
+ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
81
+ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
82
+ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
83
+ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
84
+ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
85
+ golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
86
+ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
87
+ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
88
+ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
89
+ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299 h1:DYfZAGf2WMFjMxbgTjaC+2HC7NkNAQs+6Q8b9WEB/F4=
90
+ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
91
+ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
92
+ golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
93
+ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
94
+ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
95
+ golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
96
+ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
97
+ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
98
+ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
99
+ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
100
+ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
101
+ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
102
+ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
103
+ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
104
+ google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
105
+ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
106
+ google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
107
+ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
108
+ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
109
+ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
110
+ google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
111
+ google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
112
+ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
113
+ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
114
+ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
115
+ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
116
+ google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
117
+ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
118
+ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
119
+ google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
120
+ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
121
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
122
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
123
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
124
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
125
+ gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
126
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
127
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
128
+ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
129
+ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
130
+ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
131
+ gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
132
+ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
133
+ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
134
+ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
@@ -0,0 +1,78 @@
1
+ package parquet
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+
9
+ "github.com/bsm/feedx"
10
+ )
11
+
12
+ // Format is a parquet format.
13
+ type Format struct {
14
+ TempDir string
15
+ Columns []string // column names to include
16
+ BatchSize int // batch size, default: 1,000
17
+ }
18
+
19
+ // NewDecoder implements Format.
20
+ func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
21
+ if rs, ok := r.(io.ReadSeeker); ok {
22
+ return newDecoder(rs, f.Columns, f.BatchSize)
23
+ }
24
+
25
+ tmp, err := copyToTempFile(f.TempDir, r)
26
+ if err != nil {
27
+ return nil, err
28
+ }
29
+
30
+ dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
31
+ if err != nil {
32
+ _ = tmp.Close()
33
+ return nil, err
34
+ }
35
+ dec.closers = append(dec.closers, tmp)
36
+ return dec, nil
37
+ }
38
+
39
+ // NewEncoder implements Format.
40
+ func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
41
+ return nil, fmt.Errorf("not implemented")
42
+ }
43
+
44
+ // --------------------------------------------------------------------
45
+
46
+ type tempFile struct{ *os.File }
47
+
48
+ func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
49
+ w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
50
+ if err != nil {
51
+ return nil, err
52
+ }
53
+ if _, err := io.Copy(w, r); err != nil {
54
+ _ = w.Close()
55
+ _ = os.Remove(w.Name())
56
+ return nil, err
57
+ }
58
+ if err := w.Close(); err != nil {
59
+ _ = os.Remove(w.Name())
60
+ return nil, err
61
+ }
62
+
63
+ f, err := os.Open(w.Name())
64
+ if err != nil {
65
+ _ = os.Remove(w.Name())
66
+ return nil, err
67
+ }
68
+
69
+ return &tempFile{File: f}, nil
70
+ }
71
+
72
+ func (f tempFile) Close() error {
73
+ err := f.File.Close()
74
+ if e := os.Remove(f.Name()); e != nil {
75
+ err = e
76
+ }
77
+ return err
78
+ }
@@ -0,0 +1,28 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "testing"
5
+ "time"
6
+
7
+ . "github.com/onsi/ginkgo"
8
+ . "github.com/onsi/gomega"
9
+ )
10
+
11
+ type mockStruct struct {
12
+ ID int `parquet:"id"`
13
+ Bool bool `parquet:"bool_col"`
14
+ TinyInt int8 `parquet:"tinyint_col"`
15
+ SmallUint uint16 `parquet:"smallint_col"`
16
+ StdInt int `parquet:"int_col"`
17
+ BigInt int64 `parquet:"bigint_col"`
18
+ Float *float32 `parquet:"float_col"`
19
+ Double float64 `parquet:"double_col"`
20
+ DateString string `parquet:"date_string_col"`
21
+ ByteString []byte `parquet:"string_col"`
22
+ Timestamp time.Time `parquet:"timestamp_col"`
23
+ }
24
+
25
+ func TestSuite(t *testing.T) {
26
+ RegisterFailHandler(Fail)
27
+ RunSpecs(t, "feedx/ext/parquet")
28
+ }
@@ -0,0 +1,89 @@
1
+ package parquet
2
+
3
+ import (
4
+ "io"
5
+
6
+ kpq "github.com/bsm/parquet-go/parquet"
7
+ )
8
+
9
+ type columnReader struct {
10
+ file *kpq.File
11
+ col kpq.Column
12
+ rowGroup int
13
+
14
+ chunk *kpq.ColumnChunkReader
15
+ values []interface{}
16
+ dLevels []uint16
17
+ rLevels []uint16
18
+
19
+ n, i, vi int // chunk stats
20
+ }
21
+
22
+ func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
23
+ return &columnReader{
24
+ file: file,
25
+ col: col,
26
+ values: make([]interface{}, batchSize),
27
+ dLevels: make([]uint16, batchSize),
28
+ rLevels: make([]uint16, batchSize),
29
+ }
30
+ }
31
+
32
+ func (c *columnReader) Name() string {
33
+ return c.col.String()
34
+ }
35
+
36
+ func (c *columnReader) Next() (interface{}, error) {
37
+ if err := c.ensureChunk(); err != nil {
38
+ return nil, err
39
+ }
40
+
41
+ if err := c.ensureValues(); err == kpq.EndOfChunk {
42
+ c.chunk = nil
43
+ return c.Next()
44
+ } else if err != nil {
45
+ return nil, err
46
+ }
47
+
48
+ dLevel := c.dLevels[c.i]
49
+ c.i++
50
+
51
+ if notNull := dLevel == c.col.MaxD(); notNull {
52
+ val := c.values[c.vi]
53
+ c.vi++
54
+ return val, nil
55
+ }
56
+
57
+ return nil, nil
58
+ }
59
+
60
+ func (c *columnReader) ensureChunk() error {
61
+ if c.chunk != nil {
62
+ return nil
63
+ }
64
+ if c.rowGroup >= len(c.file.MetaData.RowGroups) {
65
+ return io.EOF
66
+ }
67
+
68
+ rd, err := c.file.NewReader(c.col, c.rowGroup)
69
+ if err != nil {
70
+ return err
71
+ }
72
+ c.chunk = rd
73
+ c.rowGroup++
74
+ return nil
75
+ }
76
+
77
+ func (c *columnReader) ensureValues() error {
78
+ if c.n != 0 && c.i < c.n {
79
+ return nil
80
+ }
81
+
82
+ n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
83
+ if err != nil {
84
+ return err
85
+ }
86
+
87
+ c.n, c.i, c.vi = n, 0, 0
88
+ return nil
89
+ }
@@ -0,0 +1,51 @@
1
+ package parquet
2
+
3
+ import (
4
+ "reflect"
5
+ "strings"
6
+ "sync"
7
+ )
8
+
9
+ type structFields map[string]int
10
+
11
+ var fieldCache sync.Map // map[reflect.Type]structFields
12
+
13
+ // cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
14
+ //
15
+ // "Inspired" by https://golang.org/src/encoding/json/encode.go
16
+ // Copyright 2010 The Go Authors. All rights reserved.
17
+ func cachedTypeFields(t reflect.Type) structFields {
18
+ if f, ok := fieldCache.Load(t); ok {
19
+ return f.(structFields)
20
+ }
21
+
22
+ f, _ := fieldCache.LoadOrStore(t, typeFields(t))
23
+ return f.(structFields)
24
+ }
25
+
26
+ func tagName(tag string) string {
27
+ if pos := strings.Index(tag, ","); pos != -1 {
28
+ return tag[:pos]
29
+ }
30
+ return tag
31
+ }
32
+
33
+ // "Inspired" by https://golang.org/src/encoding/json/encode.go
34
+ // Copyright 2010 The Go Authors. All rights reserved.
35
+ func typeFields(t reflect.Type) structFields {
36
+ index := make(map[string]int, t.NumField())
37
+ for i := 0; i < t.NumField(); i++ {
38
+ field := t.Field(i)
39
+ tag := field.Tag.Get("parquet")
40
+ if tag == "-" {
41
+ continue
42
+ }
43
+
44
+ name := field.Name
45
+ if s := tagName(tag); s != "" {
46
+ name = s
47
+ }
48
+ index[name] = i
49
+ }
50
+ return index
51
+ }