feedx 0.10.2 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +3 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +2 -0
  5. data/.travis.yml +12 -2
  6. data/Gemfile +0 -2
  7. data/Gemfile.lock +50 -30
  8. data/Makefile +10 -5
  9. data/compression.go +18 -0
  10. data/compression_test.go +12 -0
  11. data/consumer_test.go +5 -4
  12. data/ext/parquet/decoder.go +170 -0
  13. data/ext/parquet/decoder_test.go +88 -0
  14. data/ext/parquet/go.mod +12 -0
  15. data/ext/parquet/go.sum +134 -0
  16. data/ext/parquet/parquet.go +78 -0
  17. data/ext/parquet/parquet_test.go +28 -0
  18. data/ext/parquet/reader.go +89 -0
  19. data/ext/parquet/testdata/alltypes_plain.parquet +0 -0
  20. data/ext/parquet/types.go +51 -0
  21. data/feedx.gemspec +3 -2
  22. data/feedx_test.go +8 -24
  23. data/format.go +50 -20
  24. data/format_test.go +8 -6
  25. data/go.mod +9 -11
  26. data/go.sum +76 -28
  27. data/internal/testdata/testdata.pb.go +223 -0
  28. data/internal/testdata/testdata.proto +15 -0
  29. data/lib/feedx/cache/abstract.rb +2 -2
  30. data/lib/feedx/cache/memory.rb +1 -0
  31. data/lib/feedx/compression.rb +11 -4
  32. data/lib/feedx/compression/abstract.rb +2 -2
  33. data/lib/feedx/compression/gzip.rb +14 -16
  34. data/lib/feedx/compression/none.rb +4 -4
  35. data/lib/feedx/consumer.rb +15 -9
  36. data/lib/feedx/format.rb +18 -9
  37. data/lib/feedx/format/abstract.rb +42 -13
  38. data/lib/feedx/format/json.rb +12 -8
  39. data/lib/feedx/format/parquet.rb +102 -0
  40. data/lib/feedx/format/protobuf.rb +16 -8
  41. data/lib/feedx/producer.rb +27 -22
  42. data/lib/feedx/stream.rb +36 -23
  43. data/producer_test.go +1 -2
  44. data/reader_test.go +6 -6
  45. data/spec/feedx/compression/gzip_spec.rb +2 -2
  46. data/spec/feedx/compression/none_spec.rb +2 -2
  47. data/spec/feedx/compression_spec.rb +9 -9
  48. data/spec/feedx/consumer_spec.rb +1 -1
  49. data/spec/feedx/format/abstract_spec.rb +11 -8
  50. data/spec/feedx/format/json_spec.rb +17 -16
  51. data/spec/feedx/format/parquet_spec.rb +30 -0
  52. data/spec/feedx/format/protobuf_spec.rb +12 -11
  53. data/spec/feedx/format_spec.rb +8 -8
  54. data/spec/feedx/producer_spec.rb +6 -0
  55. data/spec/feedx/stream_spec.rb +43 -6
  56. data/spec/spec_helper.rb +17 -1
  57. metadata +33 -5
@@ -0,0 +1,88 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "bytes"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+ "time"
9
+
10
+ "github.com/bsm/feedx"
11
+ "github.com/bsm/feedx/ext/parquet"
12
+ . "github.com/onsi/ginkgo"
13
+ . "github.com/onsi/gomega"
14
+ )
15
+
16
+ var _ = Describe("Decoder", func() {
17
+ var subject feedx.FormatDecoder
18
+ var fixture *os.File
19
+
20
+ f32ptr := func(f float32) *float32 { return &f }
21
+
22
+ BeforeEach(func() {
23
+ var err error
24
+ fixture, err = os.Open("testdata/alltypes_plain.parquet")
25
+ Expect(err).NotTo(HaveOccurred())
26
+
27
+ format := &parquet.Format{BatchSize: 3}
28
+ subject, err = format.NewDecoder(fixture)
29
+ Expect(err).NotTo(HaveOccurred())
30
+ })
31
+
32
+ AfterEach(func() {
33
+ Expect(subject.Close()).To(Succeed())
34
+ Expect(fixture.Close()).To(Succeed())
35
+ })
36
+
37
+ It("should decode", func() {
38
+ v1 := new(mockStruct)
39
+ Expect(subject.Decode(v1)).To(Succeed())
40
+ Expect(v1).To(Equal(&mockStruct{
41
+ ID: 4,
42
+ Bool: true,
43
+ Float: f32ptr(0),
44
+ DateString: "03/01/09", ByteString: []byte("0"),
45
+ Timestamp: time.Unix(1235865600, 0),
46
+ }))
47
+
48
+ v2 := new(mockStruct)
49
+ Expect(subject.Decode(v2)).To(Succeed())
50
+ Expect(v2).To(Equal(&mockStruct{
51
+ ID: 5,
52
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
53
+ Float: f32ptr(1.1), Double: 10.1,
54
+ DateString: "03/01/09", ByteString: []byte("1"),
55
+ Timestamp: time.Unix(1235865660, 0),
56
+ }))
57
+
58
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v3
59
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v4
60
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v5
61
+
62
+ v6 := new(mockStruct)
63
+ Expect(subject.Decode(v6)).To(Succeed())
64
+ Expect(v6).To(Equal(&mockStruct{
65
+ ID: 3,
66
+ Bool: false,
67
+ TinyInt: 1, SmallUint: 1, StdInt: 1, BigInt: 10,
68
+ Float: f32ptr(1.1), Double: 10.1,
69
+ DateString: "02/01/09", ByteString: []byte("1"),
70
+ Timestamp: time.Unix(1233446460, 0),
71
+ }))
72
+
73
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v7
74
+ Expect(subject.Decode(new(mockStruct))).To(Succeed()) // v8
75
+
76
+ v9 := new(mockStruct)
77
+ Expect(subject.Decode(v9)).To(MatchError(io.EOF))
78
+ })
79
+
80
+ It("should open from non-file readers", func() {
81
+ bin, err := ioutil.ReadFile("testdata/alltypes_plain.parquet")
82
+ Expect(err).NotTo(HaveOccurred())
83
+
84
+ dec, err := new(parquet.Format).NewDecoder(bytes.NewReader(bin))
85
+ Expect(err).NotTo(HaveOccurred())
86
+ Expect(dec.Close()).To(Succeed())
87
+ })
88
+ })
@@ -0,0 +1,12 @@
1
+ module github.com/bsm/feedx/ext/parquet
2
+
3
+ go 1.15
4
+
5
+ replace github.com/bsm/feedx => ../../
6
+
7
+ require (
8
+ github.com/bsm/feedx v0.12.2
9
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00
10
+ github.com/onsi/ginkgo v1.14.1
11
+ github.com/onsi/gomega v1.10.2
12
+ )
@@ -0,0 +1,134 @@
1
+ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
2
+ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
3
+ github.com/bmatcuk/doublestar v1.2.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
4
+ github.com/bmatcuk/doublestar v1.3.2 h1:mzUncgFmpzNUhIITFqGdZ8nUU0O7JTJzRO8VdkeLCSo=
5
+ github.com/bmatcuk/doublestar v1.3.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
6
+ github.com/bsm/bfs v0.10.4 h1:59I1FBEcIku/1MfPyIEeBfKm+ICaJ4lVEcago/YeCLg=
7
+ github.com/bsm/bfs v0.10.4/go.mod h1:N3md8kQvlteRDcfc8tqw759yW98dhj+6seWEVcg4CmM=
8
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00 h1:seSUCN95Kgv0mxmBUxx99faZYYQB4+TbE1Do6E5ZwI4=
9
+ github.com/bsm/parquet-go v0.0.0-20200904113737-792434824f00/go.mod h1:7EU1rAae6Y67NlxAbr+Mbxxxc5AFGcxZCSW/vbybDLs=
10
+ github.com/bsm/pbio v0.2.2 h1:Xdj5hQkS0K3kKc1NY6hoSWMvzpq0Mk5j8vcc7irslno=
11
+ github.com/bsm/pbio v0.2.2/go.mod h1:3O4XQFoarlYalkGd+zMFfEUyalP8NBKkQ0Ta4IwhN4w=
12
+ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
13
+ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
14
+ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
15
+ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
16
+ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
17
+ github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
18
+ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
19
+ github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
20
+ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
21
+ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
22
+ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
23
+ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
24
+ github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
25
+ github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
26
+ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
27
+ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
28
+ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
29
+ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
30
+ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
31
+ github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
32
+ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
33
+ github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
34
+ github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
35
+ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
36
+ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
37
+ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
38
+ github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
39
+ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
40
+ github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w=
41
+ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
42
+ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
43
+ github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
44
+ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
45
+ github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
46
+ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
47
+ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
48
+ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
49
+ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
50
+ github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
51
+ github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
52
+ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
53
+ github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
54
+ github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
55
+ github.com/onsi/ginkgo v1.14.1 h1:jMU0WaQrP0a/YAEq8eJmJKjBoMs+pClEr1vDMlM/Do4=
56
+ github.com/onsi/ginkgo v1.14.1/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
57
+ github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
58
+ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
59
+ github.com/onsi/gomega v1.10.1 h1:o0+MgICZLuZ7xjH7Vx6zS/zcu93/BEp1VwkIW1mEXCE=
60
+ github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
61
+ github.com/onsi/gomega v1.10.2 h1:aY/nuoWlKJud2J6U0E3NWsjlg+0GtwXxgEqthRdzlcs=
62
+ github.com/onsi/gomega v1.10.2/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
63
+ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
64
+ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
65
+ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
66
+ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
67
+ golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
68
+ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
69
+ golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
70
+ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
71
+ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
72
+ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
73
+ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
74
+ golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
75
+ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7 h1:AeiKBIuRw3UomYXSbLy0Mc2dDLfdtbT/IVn4keq83P0=
76
+ golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
77
+ golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
78
+ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
79
+ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
80
+ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
81
+ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
82
+ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
83
+ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
84
+ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
85
+ golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
86
+ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
87
+ golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
88
+ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
89
+ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299 h1:DYfZAGf2WMFjMxbgTjaC+2HC7NkNAQs+6Q8b9WEB/F4=
90
+ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
91
+ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
92
+ golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
93
+ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
94
+ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
95
+ golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
96
+ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
97
+ golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
98
+ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
99
+ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
100
+ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
101
+ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
102
+ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
103
+ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
104
+ google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
105
+ google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
106
+ google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
107
+ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
108
+ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
109
+ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
110
+ google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
111
+ google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
112
+ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
113
+ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
114
+ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
115
+ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
116
+ google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM=
117
+ google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
118
+ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
119
+ google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=
120
+ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
121
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
122
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
123
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
124
+ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
125
+ gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
126
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
127
+ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
128
+ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
129
+ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
130
+ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
131
+ gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
132
+ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
133
+ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
134
+ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
@@ -0,0 +1,78 @@
1
+ package parquet
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "io/ioutil"
7
+ "os"
8
+
9
+ "github.com/bsm/feedx"
10
+ )
11
+
12
+ // Format is a parquet format.
13
+ type Format struct {
14
+ TempDir string
15
+ Columns []string // column names to include
16
+ BatchSize int // batch size, default: 1,000
17
+ }
18
+
19
+ // NewDecoder implements Format.
20
+ func (f *Format) NewDecoder(r io.Reader) (feedx.FormatDecoder, error) {
21
+ if rs, ok := r.(io.ReadSeeker); ok {
22
+ return newDecoder(rs, f.Columns, f.BatchSize)
23
+ }
24
+
25
+ tmp, err := copyToTempFile(f.TempDir, r)
26
+ if err != nil {
27
+ return nil, err
28
+ }
29
+
30
+ dec, err := newDecoder(tmp, f.Columns, f.BatchSize)
31
+ if err != nil {
32
+ _ = tmp.Close()
33
+ return nil, err
34
+ }
35
+ dec.closers = append(dec.closers, tmp)
36
+ return dec, nil
37
+ }
38
+
39
+ // NewEncoder implements Format.
40
+ func (*Format) NewEncoder(w io.Writer) (feedx.FormatEncoder, error) {
41
+ return nil, fmt.Errorf("not implemented")
42
+ }
43
+
44
+ // --------------------------------------------------------------------
45
+
46
+ type tempFile struct{ *os.File }
47
+
48
+ func copyToTempFile(dir string, r io.Reader) (*tempFile, error) {
49
+ w, err := ioutil.TempFile(dir, "feedx-ext-parquet")
50
+ if err != nil {
51
+ return nil, err
52
+ }
53
+ if _, err := io.Copy(w, r); err != nil {
54
+ _ = w.Close()
55
+ _ = os.Remove(w.Name())
56
+ return nil, err
57
+ }
58
+ if err := w.Close(); err != nil {
59
+ _ = os.Remove(w.Name())
60
+ return nil, err
61
+ }
62
+
63
+ f, err := os.Open(w.Name())
64
+ if err != nil {
65
+ _ = os.Remove(w.Name())
66
+ return nil, err
67
+ }
68
+
69
+ return &tempFile{File: f}, nil
70
+ }
71
+
72
+ func (f tempFile) Close() error {
73
+ err := f.File.Close()
74
+ if e := os.Remove(f.Name()); e != nil {
75
+ err = e
76
+ }
77
+ return err
78
+ }
@@ -0,0 +1,28 @@
1
+ package parquet_test
2
+
3
+ import (
4
+ "testing"
5
+ "time"
6
+
7
+ . "github.com/onsi/ginkgo"
8
+ . "github.com/onsi/gomega"
9
+ )
10
+
11
+ type mockStruct struct {
12
+ ID int `parquet:"id"`
13
+ Bool bool `parquet:"bool_col"`
14
+ TinyInt int8 `parquet:"tinyint_col"`
15
+ SmallUint uint16 `parquet:"smallint_col"`
16
+ StdInt int `parquet:"int_col"`
17
+ BigInt int64 `parquet:"bigint_col"`
18
+ Float *float32 `parquet:"float_col"`
19
+ Double float64 `parquet:"double_col"`
20
+ DateString string `parquet:"date_string_col"`
21
+ ByteString []byte `parquet:"string_col"`
22
+ Timestamp time.Time `parquet:"timestamp_col"`
23
+ }
24
+
25
+ func TestSuite(t *testing.T) {
26
+ RegisterFailHandler(Fail)
27
+ RunSpecs(t, "feedx/ext/parquet")
28
+ }
@@ -0,0 +1,89 @@
1
+ package parquet
2
+
3
+ import (
4
+ "io"
5
+
6
+ kpq "github.com/bsm/parquet-go/parquet"
7
+ )
8
+
9
+ type columnReader struct {
10
+ file *kpq.File
11
+ col kpq.Column
12
+ rowGroup int
13
+
14
+ chunk *kpq.ColumnChunkReader
15
+ values []interface{}
16
+ dLevels []uint16
17
+ rLevels []uint16
18
+
19
+ n, i, vi int // chunk stats
20
+ }
21
+
22
+ func newColumnReader(file *kpq.File, col kpq.Column, batchSize int) *columnReader {
23
+ return &columnReader{
24
+ file: file,
25
+ col: col,
26
+ values: make([]interface{}, batchSize),
27
+ dLevels: make([]uint16, batchSize),
28
+ rLevels: make([]uint16, batchSize),
29
+ }
30
+ }
31
+
32
+ func (c *columnReader) Name() string {
33
+ return c.col.String()
34
+ }
35
+
36
+ func (c *columnReader) Next() (interface{}, error) {
37
+ if err := c.ensureChunk(); err != nil {
38
+ return nil, err
39
+ }
40
+
41
+ if err := c.ensureValues(); err == kpq.EndOfChunk {
42
+ c.chunk = nil
43
+ return c.Next()
44
+ } else if err != nil {
45
+ return nil, err
46
+ }
47
+
48
+ dLevel := c.dLevels[c.i]
49
+ c.i++
50
+
51
+ if notNull := dLevel == c.col.MaxD(); notNull {
52
+ val := c.values[c.vi]
53
+ c.vi++
54
+ return val, nil
55
+ }
56
+
57
+ return nil, nil
58
+ }
59
+
60
+ func (c *columnReader) ensureChunk() error {
61
+ if c.chunk != nil {
62
+ return nil
63
+ }
64
+ if c.rowGroup >= len(c.file.MetaData.RowGroups) {
65
+ return io.EOF
66
+ }
67
+
68
+ rd, err := c.file.NewReader(c.col, c.rowGroup)
69
+ if err != nil {
70
+ return err
71
+ }
72
+ c.chunk = rd
73
+ c.rowGroup++
74
+ return nil
75
+ }
76
+
77
+ func (c *columnReader) ensureValues() error {
78
+ if c.n != 0 && c.i < c.n {
79
+ return nil
80
+ }
81
+
82
+ n, err := c.chunk.Read(c.values, c.dLevels, c.rLevels)
83
+ if err != nil {
84
+ return err
85
+ }
86
+
87
+ c.n, c.i, c.vi = n, 0, 0
88
+ return nil
89
+ }
@@ -0,0 +1,51 @@
1
+ package parquet
2
+
3
+ import (
4
+ "reflect"
5
+ "strings"
6
+ "sync"
7
+ )
8
+
9
+ type structFields map[string]int
10
+
11
+ var fieldCache sync.Map // map[reflect.Type]structFields
12
+
13
+ // cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
14
+ //
15
+ // "Inspired" by https://golang.org/src/encoding/json/encode.go
16
+ // Copyright 2010 The Go Authors. All rights reserved.
17
+ func cachedTypeFields(t reflect.Type) structFields {
18
+ if f, ok := fieldCache.Load(t); ok {
19
+ return f.(structFields)
20
+ }
21
+
22
+ f, _ := fieldCache.LoadOrStore(t, typeFields(t))
23
+ return f.(structFields)
24
+ }
25
+
26
+ func tagName(tag string) string {
27
+ if pos := strings.Index(tag, ","); pos != -1 {
28
+ return tag[:pos]
29
+ }
30
+ return tag
31
+ }
32
+
33
+ // "Inspired" by https://golang.org/src/encoding/json/encode.go
34
+ // Copyright 2010 The Go Authors. All rights reserved.
35
+ func typeFields(t reflect.Type) structFields {
36
+ index := make(map[string]int, t.NumField())
37
+ for i := 0; i < t.NumField(); i++ {
38
+ field := t.Field(i)
39
+ tag := field.Tag.Get("parquet")
40
+ if tag == "-" {
41
+ continue
42
+ }
43
+
44
+ name := field.Name
45
+ if s := tagName(tag); s != "" {
46
+ name = s
47
+ }
48
+ index[name] = i
49
+ }
50
+ return index
51
+ }