embulk-filter-ruby_proc 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/embulk-filter-ruby_proc.gemspec +1 -1
- data/example/config.yml +5 -1
- data/lib/embulk/filter/ruby_proc.rb +30 -10
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b9a7bffd90b4924602a9fc0378f77b781ab05376
         | 
| 4 | 
            +
              data.tar.gz: 48134bc5631972efc8286573543dbfb9d312abab
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c708ef11682d2b028b4f8adb70b41cb56b25ed63ef6575ffed9e7e4ab09d221d33e43eccde221923582924a342e57a4fea1e9cb4260838f98b9fc4e0dbf22647
         | 
| 7 | 
            +
              data.tar.gz: 583880f5a19829ed75ac4c11ef34c55ae12c5a1291f2a414890d9ce4f7a4e9de7c78a627e96d12a0807b5f5bd2f01a93112e37819c3d65f0b9c1f3c484a78ded
         | 
    
        data/README.md
    CHANGED
    
    | @@ -32,6 +32,11 @@ filters: | |
| 32 32 | 
             
              - type: ruby_proc
         | 
| 33 33 | 
             
                requires:
         | 
| 34 34 | 
             
                  - cgi
         | 
| 35 | 
            +
                rows:
         | 
| 36 | 
            +
                  - proc: |
         | 
| 37 | 
            +
                      ->(record) do
         | 
| 38 | 
            +
                        [record.dup, record.dup.tap { |r| r["id"] += 10 }]
         | 
| 39 | 
            +
                      end
         | 
| 35 40 | 
             
                columns:
         | 
| 36 41 | 
             
                  - name: data
         | 
| 37 42 | 
             
                    proc: |
         | 
| @@ -51,7 +56,6 @@ filters: | |
| 51 56 | 
             
                    proc_file: comment_upcase.rb
         | 
| 52 57 | 
             
                    skip_nil: false
         | 
| 53 58 | 
             
                    type: json
         | 
| 54 | 
            -
                target: events
         | 
| 55 59 |  | 
| 56 60 | 
             
            # ...
         | 
| 57 61 |  | 
| @@ -66,15 +70,22 @@ filters: | |
| 66 70 | 
             
            end
         | 
| 67 71 | 
             
            ```
         | 
| 68 72 |  | 
| 73 | 
            +
            rows proc must return array of record hash.
         | 
| 74 | 
            +
            And user must take care of object identity. Otherwise, error may be occurred when plugin applys column procs.
         | 
| 75 | 
            +
             | 
| 69 76 | 
             
            ### preview
         | 
| 70 77 | 
             
            ```
         | 
| 71 78 | 
             
            +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
         | 
| 72 79 | 
             
            | id:string | account:long |          time:timestamp |      purchase:timestamp |                             comment:json |                                                                                data:json |
         | 
| 73 80 | 
             
            +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
         | 
| 74 81 | 
             
            |         2 |       32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC |                               ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
         | 
| 82 | 
            +
            |        22 |       32,864 | 2015-01-27 19:23:49 UTC | 2015-01-27 00:00:00 UTC |                               ["EMBULK"] | {"events":[{"id":1,"name":"Name1","idx":0},{"id":2,"name":"Name2","idx":1}],"foo":"bar"} |
         | 
| 75 83 | 
             
            |         4 |       14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC |                       ["EMBULK","JRUBY"] |                                                                                          |
         | 
| 84 | 
            +
            |        24 |       14,824 | 2015-01-27 19:01:23 UTC | 2015-01-27 00:00:00 UTC |                       ["EMBULK","JRUBY"] |                                                                                          |
         | 
| 76 85 | 
             
            |         6 |       27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] |                                                                                          |
         | 
| 86 | 
            +
            |        26 |       27,559 | 2015-01-28 02:20:02 UTC | 2015-01-28 00:00:00 UTC | ["EMBULK","%22CSV%22","PARSER","PLUGIN"] |                                                                                          |
         | 
| 77 87 | 
             
            |         8 |       11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC |                                ["11270"] |                                                                                          |
         | 
| 88 | 
            +
            |        28 |       11,270 | 2015-01-29 11:54:36 UTC | 2015-01-29 00:00:00 UTC |                                ["11270"] |                                                                                          |
         | 
| 78 89 | 
             
            +-----------+--------------+-------------------------+-------------------------+------------------------------------------+------------------------------------------------------------------------------------------+
         | 
| 79 90 | 
             
            ```
         | 
| 80 91 |  | 
    
        data/example/config.yml
    CHANGED
    
    | @@ -25,6 +25,11 @@ filters: | |
| 25 25 | 
             
              - type: ruby_proc
         | 
| 26 26 | 
             
                requires:
         | 
| 27 27 | 
             
                  - cgi
         | 
| 28 | 
            +
                rows:
         | 
| 29 | 
            +
                  - proc: |
         | 
| 30 | 
            +
                      ->(record) do
         | 
| 31 | 
            +
                        [record.dup, record.dup.tap { |r| r["id"] += 10 }]
         | 
| 32 | 
            +
                      end
         | 
| 28 33 | 
             
                columns:
         | 
| 29 34 | 
             
                  - name: data
         | 
| 30 35 | 
             
                    proc: |
         | 
| @@ -44,7 +49,6 @@ filters: | |
| 44 49 | 
             
                    proc_file: comment_upcase.rb
         | 
| 45 50 | 
             
                    skip_nil: false
         | 
| 46 51 | 
             
                    type: json
         | 
| 47 | 
            -
                target: events
         | 
| 48 52 |  | 
| 49 53 | 
             
            out:
         | 
| 50 54 | 
             
              type: file
         | 
| @@ -6,7 +6,8 @@ module Embulk | |
| 6 6 |  | 
| 7 7 | 
             
                  def self.transaction(config, in_schema, &control)
         | 
| 8 8 | 
             
                    task = {
         | 
| 9 | 
            -
                      "columns" => config.param("columns", :array),
         | 
| 9 | 
            +
                      "columns" => config.param("columns", :array, default: []),
         | 
| 10 | 
            +
                      "rows" => config.param("rows", :array, default: []),
         | 
| 10 11 | 
             
                      "requires" => config.param("requires", :array, default: []),
         | 
| 11 12 | 
             
                    }
         | 
| 12 13 |  | 
| @@ -27,6 +28,7 @@ module Embulk | |
| 27 28 | 
             
                    task["requires"].each do |lib|
         | 
| 28 29 | 
             
                      require lib
         | 
| 29 30 | 
             
                    end
         | 
| 31 | 
            +
             | 
| 30 32 | 
             
                    @procs = Hash[task["columns"].map {|col|
         | 
| 31 33 | 
             
                      if col["proc"]
         | 
| 32 34 | 
             
                        [col["name"], eval(col["proc"])]
         | 
| @@ -34,6 +36,15 @@ module Embulk | |
| 34 36 | 
             
                        [col["name"], eval(File.read(col["proc_file"]), binding, File.expand_path(col["proc_file"]))]
         | 
| 35 37 | 
             
                      end
         | 
| 36 38 | 
             
                    }]
         | 
| 39 | 
            +
                    @row_procs = task["rows"].map {|rowdef|
         | 
| 40 | 
            +
                      if rowdef["proc"]
         | 
| 41 | 
            +
                        eval(rowdef["proc"])
         | 
| 42 | 
            +
                      else
         | 
| 43 | 
            +
                        eval(File.read(rowdef["proc_file"]), binding, File.expand_path(rowdef["proc_file"]))
         | 
| 44 | 
            +
                      end
         | 
| 45 | 
            +
                    }.compact
         | 
| 46 | 
            +
                    raise "Need columns or rows parameter" if @row_procs.empty? && @procs.empty?
         | 
| 47 | 
            +
             | 
| 37 48 | 
             
                    @skip_nils = Hash[task["columns"].map {|col|
         | 
| 38 49 | 
             
                      [col["name"], col["skip_nil"].nil? ? true : !!col["skip_nil"]]
         | 
| 39 50 | 
             
                    }]
         | 
| @@ -44,18 +55,27 @@ module Embulk | |
| 44 55 |  | 
| 45 56 | 
             
                  def add(page)
         | 
| 46 57 | 
             
                    page.each do |record|
         | 
| 47 | 
            -
                       | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
                         | 
| 58 | 
            +
                      if @row_procs.empty?
         | 
| 59 | 
            +
                        record_hashes = [hashrize(record)]
         | 
| 60 | 
            +
                      else
         | 
| 61 | 
            +
                        record_hashes = @row_procs.flat_map do |pr|
         | 
| 62 | 
            +
                          pr.call(hashrize(record))
         | 
| 63 | 
            +
                        end
         | 
| 64 | 
            +
                      end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                      record_hashes.each do |record_hash|
         | 
| 67 | 
            +
                        @procs.each do |col, pr|
         | 
| 68 | 
            +
                          next unless record_hash.has_key?(col)
         | 
| 69 | 
            +
                          next if record_hash[col].nil? && @skip_nils[col]
         | 
| 51 70 |  | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 71 | 
            +
                          if pr.arity == 1
         | 
| 72 | 
            +
                            record_hash[col] = pr.call(record_hash[col])
         | 
| 73 | 
            +
                          else
         | 
| 74 | 
            +
                            record_hash[col] = pr.call(record_hash[col], record_hash)
         | 
| 75 | 
            +
                          end
         | 
| 56 76 | 
             
                        end
         | 
| 77 | 
            +
                        page_builder.add(record_hash.values)
         | 
| 57 78 | 
             
                      end
         | 
| 58 | 
            -
                      page_builder.add(record_hash.values)
         | 
| 59 79 | 
             
                    end
         | 
| 60 80 | 
             
                  end
         | 
| 61 81 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: embulk-filter-ruby_proc
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.3.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - joker1007
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2016- | 
| 11 | 
            +
            date: 2016-03-24 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: embulk
         |