parse_me 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rvmrc ADDED
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env bash
2
+
3
+ # This is an RVM Project .rvmrc file, used to automatically load the ruby
4
+ # development environment upon cd'ing into the directory
5
+
6
+ # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
7
+ # Only full ruby name is supported here, for short names use:
8
+ # echo "rvm use 1.9.3" > .rvmrc
9
+ environment_id="ruby-1.9.3@parse_me"
10
+
11
+ # Uncomment the following lines if you want to verify rvm version per project
12
+ # rvmrc_rvm_version="1.18.18 (version)" # 1.10.1 seams as a safe start
13
+ # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14
+ # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15
+ # return 1
16
+ # }
17
+
18
+ # First we attempt to load the desired environment directly from the environment
19
+ # file. This is very fast and efficient compared to running through the entire
20
+ # CLI and selector. If you want feedback on which environment was used then
21
+ # insert the word 'use' after --create as this triggers verbose mode.
22
+ if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24
+ then
25
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26
+ [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27
+ \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28
+ else
29
+ # If the environment file has not yet been created, use the RVM CLI to select.
30
+ rvm --create "$environment_id" || {
31
+ echo "Failed to create RVM environment '${environment_id}'."
32
+ return 1
33
+ }
34
+ fi
35
+
36
+ # If you use bundler, this might be useful to you:
37
+ if [[ -s Gemfile ]] && {
38
+ ! builtin command -v bundle >/dev/null ||
39
+ builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40
+ }
41
+ then
42
+ printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43
+ gem install bundler
44
+ fi
45
+ if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46
+ then
47
+ bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48
+ fi
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.2"
4
+ - "1.9.3"
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in parse_me.gemspec
4
+ gemspec
5
+
6
+ gem "rspec"
7
+ gem "simplecov"
8
+ gem "rake"
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Serge Morales
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,50 @@
1
+ # ParseMe [![Build Status](https://travis-ci.org/sergelerator/parse_me.png?branch=master)](https://travis-ci.org/sergelerator/parse_me)
2
+
3
+ The `parse_me` gem aims to provide an easy to use plain text parser. Currently on early development, expect to be able to parse fixed and variable width files with something similar to:
4
+
5
+ rules = {
6
+ name: { required: true, length: 50 },
7
+ last_name: { required: true, length: 50 },
8
+ age: { numeric: :integer },
9
+ email: { required: true, email: true},
10
+ last_seen: { date: "YYYY-MM-DD" }
11
+ }
12
+
13
+ options = {
14
+ row_delimiter: "\r\n", # Defaults to "\n"
15
+ field_delimiter: "|"
16
+ }
17
+
18
+ result = ParseMe::VarWidthParser.parse(source_string, rules, options)
19
+ result.each do |record|
20
+ record.valid? # => Runs the validations specified in rules, returns true if all pass
21
+ record.attributes # => Returns a Hash with the parsed attributes, using rules' keys
22
+ record.name # => Returns the name attribute
23
+ record.errors # => Returns a collection of the errors encountered during validation
24
+ end
25
+
26
+ ## Installation
27
+
28
+ Add this line to your application's Gemfile:
29
+
30
+ gem 'parse_me'
31
+
32
+ And then execute:
33
+
34
+ $ bundle
35
+
36
+ Or install it yourself as:
37
+
38
+ $ gem install parse_me
39
+
40
+ ## Usage
41
+
42
+ Usage will be documented on the first Alpha release.
43
+
44
+ ## Contributing
45
+
46
+ 1. Fork it
47
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
48
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
49
+ 4. Push to the branch (`git push origin my-new-feature`)
50
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ namespace :spec do
4
+ desc "Runs all the specs"
5
+ task :all do
6
+ system("bundle exec rspec")
7
+ end
8
+ end
9
+
10
+ task default: 'spec:all'
@@ -0,0 +1,11 @@
1
+ require 'parse_me/parsed_object'
2
+ require 'parse_me/input_splitting'
3
+ require 'parse_me/input_splitting/fixed'
4
+ require 'parse_me/input_splitting/var'
5
+
6
+ module ParseMe
7
+ class FixedWidthParser
8
+ extend InputSplitting::Fixed
9
+ include InputSplitting::Fixed
10
+ end
11
+ end
@@ -0,0 +1,51 @@
1
+ module ParseMe
2
+ module InputSplitting
3
+ module Fixed
4
+ module Base
5
+ # Boolean check, validates the presence of a length rule for every attribute in the @rules
6
+ def valid_length_rules?
7
+ @rules and get_length_rules.reduce(true, :&) and valid_row_labels?
8
+ end
9
+
10
+ # Maps the length rules to an array
11
+ def get_length_rules
12
+ @length_rules ||= (labeled_rows? ? @rules.values.map(&:values).flatten : @rules.values).map{|a| a[:length]}
13
+ end
14
+
15
+ # Returns the fixed width of a file
16
+ def file_width
17
+ raise Exception.new("File width can't be properly calculated if length rules are invalid") unless valid_length_rules?
18
+ @file_width ||= (labeled_rows? ? @rules.first[1].values : @rules.values).map{|a| a[:length]}.reduce(0, :+) + label_length
19
+ end
20
+
21
+ # Creates the template string used to unpack a record
22
+ def unpack_pattern label = nil
23
+ (labeled_rows? ? "A#{label_length}" : '') + length_rules_for(label).map{|l| "A#{l}"}.join
24
+ end
25
+
26
+ # Splits the input into lines, uses the file width for this.
27
+ def split_input
28
+ @split_input ||= @source.delete("\n").scan(/.{#{file_width}}/)
29
+ end
30
+
31
+ # Gets the list of attributes specified in @rules. Can optionally retrieve them from a labeled set of rules, the optional label argument should be supplied when the labeled_rows option is set.
32
+ def attributes label = nil
33
+ (labeled_rows? ? @rules[label] : @rules).keys
34
+ end
35
+
36
+ # Decodes the supplied string and returns the attributes hash
37
+ def member str
38
+ unpacked = str.unpack unpack_pattern
39
+ Hash[attributes.zip unpacked]
40
+ end
41
+
42
+ # Maps a collection of attributes in the form of hashes.
43
+ def collection
44
+ split_input.map do |str|
45
+ labeled_rows? ? labeled_member(str, label_for(str)) : member(str)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,53 @@
1
+ module ParseMe
2
+ module InputSplitting
3
+ module Fixed
4
+ module Labels
5
+ # Boolean check for the parse option `:labeled_rows`
6
+ def labeled_rows?
7
+ @labeled_rows ||= @options[:labeled_rows]
8
+ end
9
+
10
+ # Boolean validation check, returns a truth value if all of the labels specified in the @rules are of the same length.
11
+ def valid_row_labels?
12
+ return true unless labeled_rows?
13
+ label_lengths.first == label_lengths.last
14
+ end
15
+
16
+ # Returns an array with the length rules in a given @rules configuration hash.
17
+ # If the label does not exist (or if the `labeled_rows?` switch is turned off)
18
+ # this method falls back to fetching the rules of the root @rules hash.
19
+ def length_rules_for label
20
+ (@rules[label] || @rules).values.map{|a| a[:length]}
21
+ end
22
+
23
+ # Gets an array with the labels specified in @rules.
24
+ def labels
25
+ @labels ||= @rules.keys.map(&:to_s).sort_by{|label| label.length }
26
+ end
27
+
28
+ # Returns an array with the lengths of the labels specified in @rules
29
+ def label_lengths
30
+ @label_lengths ||= labels.map(&:length).map(&:to_i)
31
+ end
32
+
33
+ # Returns the length of the labels used in the file
34
+ def label_length
35
+ @label_length ||= labeled_rows? ? label_lengths.first : 0
36
+ end
37
+
38
+ # Returns the label for a given string
39
+ def label_for str
40
+ str[0...label_length]
41
+ end
42
+
43
+ # Returns a Hash with the data from *str* formatted as an attribute list,
44
+ # with the keys being attribute names, and the values being the attribute
45
+ # values for that key.
46
+ def labeled_member str, label
47
+ (unpacked = str.unpack unpack_pattern(label)).shift
48
+ Hash[attributes(label).zip unpacked].merge({__label: label})
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,40 @@
1
+ require 'parse_me/input_splitting/fixed/labels'
2
+ require 'parse_me/input_splitting/fixed/base'
3
+
4
+ module ParseMe
5
+ module InputSplitting
6
+ module Fixed
7
+ def self.included(base)
8
+ base.instance_eval do
9
+ include Base
10
+ include Labels
11
+ end
12
+ end
13
+
14
+ def self.extended(base)
15
+ base.instance_eval do
16
+ extend Base
17
+ extend Labels
18
+ end
19
+ end
20
+
21
+ def parse source, rules, options = {}
22
+ @options = options
23
+ @options[:labeled_rows] ||= false
24
+ @options[:row_delimiter] ||= "\n"
25
+
26
+ @rules = rules
27
+ @source = source
28
+
29
+ unless valid_length_rules?
30
+ raise Exception.new('Fixed size file input requires a length rule for every attribute')
31
+ end
32
+
33
+ collection.map do |record|
34
+ ParsedObject.new(record, @rules)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
File without changes
@@ -0,0 +1,7 @@
1
+ require 'parse_me/input_splitting/fixed'
2
+ require 'parse_me/input_splitting/var'
3
+
4
+ module ParseMe
5
+ module InputSplitting
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ require 'parse_me/transformations'
2
+ require 'parse_me/validations'
3
+
4
+ module ParseMe
5
+ class ParsedObject
6
+ include ParseMe::Validations
7
+ include ParseMe::Transformations
8
+
9
+ attr_reader :attributes, :record_label, :rules, :errors
10
+
11
+ def initialize(attributes, rules = {})
12
+ @attributes = attributes
13
+ @record_label = @attributes.delete(:__label) if @attributes[:__label]
14
+ @rules = record_label ? rules[record_label] : rules
15
+ @errors = []
16
+
17
+ apply_validations
18
+ apply_transformations
19
+ end
20
+
21
+ def method_missing(name, *args, &b)
22
+ @attributes[name] || super
23
+ end
24
+
25
+ def transformations
26
+ @transformations ||= ParseMe::Transformations.public_instance_methods
27
+ end
28
+
29
+ def validation_rules
30
+ @validation_rules ||= ParseMe::Validations.public_instance_methods
31
+ end
32
+
33
+ def apply_transformations
34
+ rules.each do |attr, set|
35
+ (set.keys & transformations).each do |transformation|
36
+ transform(attr, transformation, set[transformation])
37
+ end
38
+ end
39
+ end
40
+
41
+ def transform(attr, transformation, val)
42
+ @attributes[attr] = method(transformation).call(@attributes[attr], val)
43
+ end
44
+
45
+ def apply_validations
46
+ rules.each do |attr, set|
47
+ (set.keys & validation_rules).each do |validation|
48
+ validate(attr, validation, set[validation])
49
+ end
50
+ end
51
+ end
52
+
53
+ def validate(attr, validation, val)
54
+ unless method(validation).call(@attributes[attr], val)
55
+ add_error("#{validation} validation failed for '#{attr}' => '#{val}'")
56
+ end
57
+ end
58
+
59
+ def valid?
60
+ errors.empty?
61
+ end
62
+
63
+ def add_error msg
64
+ errors << msg
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,9 @@
1
+ module ParseMe
2
+ module Transformations
3
+ def ghost_decimal_scale(input, rule_value)
4
+ raise Exception("ghost_decimal_scale should be able to ve converted to int") unless (val = rule_value.to_i)
5
+
6
+ input.to_f / (10**val)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ module ParseMe
2
+ module Validations
3
+ def length input, rule_value
4
+ input.to_s.length <= rule_value.to_i
5
+ end
6
+
7
+ def required input, rule_value
8
+ (not rule_value) || ((!input.nil?) && (input.to_s.strip.length > 0))
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,6 @@
1
+ require 'parse_me/parsed_object'
2
+
3
+ module ParseMe
4
+ class VarWidthParser
5
+ end
6
+ end
@@ -0,0 +1,3 @@
1
+ module ParseMe
2
+ VERSION = "0.0.2"
3
+ end
data/lib/parse_me.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "parse_me/version"
2
+ require "parse_me/fixed_width_parser"
3
+ require "parse_me/var_width_parser"
4
+ require "parse_me/parsed_object"
5
+ require "parse_me/transformations"
6
+
7
+ module ParseMe
8
+ end
data/parse_me.gemspec ADDED
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'parse_me/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "parse_me"
8
+ gem.version = ParseMe::VERSION
9
+ gem.authors = ["Serge Morales"]
10
+ gem.email = ["i.serge23@gmail.com"]
11
+ gem.description = "Plain text file parser"
12
+ gem.summary = "Parse your fixed or variable width plain text files using your own custom layouts, easily validate and retrieve your data."
13
+ gem.homepage = "https://github.com/sergelerator/parse_me"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end