parse_me 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/.rvmrc +48 -0
- data/.travis.yml +4 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +50 -0
- data/Rakefile +10 -0
- data/lib/parse_me/fixed_width_parser.rb +11 -0
- data/lib/parse_me/input_splitting/fixed/base.rb +51 -0
- data/lib/parse_me/input_splitting/fixed/labels.rb +53 -0
- data/lib/parse_me/input_splitting/fixed.rb +40 -0
- data/lib/parse_me/input_splitting/var.rb +0 -0
- data/lib/parse_me/input_splitting.rb +7 -0
- data/lib/parse_me/parsed_object.rb +67 -0
- data/lib/parse_me/transformations.rb +9 -0
- data/lib/parse_me/validations.rb +11 -0
- data/lib/parse_me/var_width_parser.rb +6 -0
- data/lib/parse_me/version.rb +3 -0
- data/lib/parse_me.rb +8 -0
- data/parse_me.gemspec +19 -0
- data/spec/fixed_width/input_splitting_spec.rb +389 -0
- data/spec/fixed_width/parser_spec.rb +35 -0
- data/spec/namespace_spec.rb +20 -0
- data/spec/parsed_object/parsed_object_spec.rb +165 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/transformations/basic_spec.rb +124 -0
- data/spec/validation/basic_spec.rb +76 -0
- data/spec/validation/date_spec.rb +1 -0
- data/spec/validation/numeric_spec.rb +1 -0
- metadata +90 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
+
# development environment upon cd'ing into the directory
|
5
|
+
|
6
|
+
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
+
# Only full ruby name is supported here, for short names use:
|
8
|
+
# echo "rvm use 1.9.3" > .rvmrc
|
9
|
+
environment_id="ruby-1.9.3@parse_me"
|
10
|
+
|
11
|
+
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
+
# rvmrc_rvm_version="1.18.18 (version)" # 1.10.1 seams as a safe start
|
13
|
+
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
+
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
+
# return 1
|
16
|
+
# }
|
17
|
+
|
18
|
+
# First we attempt to load the desired environment directly from the environment
|
19
|
+
# file. This is very fast and efficient compared to running through the entire
|
20
|
+
# CLI and selector. If you want feedback on which environment was used then
|
21
|
+
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
+
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
+
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
+
then
|
25
|
+
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
+
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
+
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
+
else
|
29
|
+
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
+
rvm --create "$environment_id" || {
|
31
|
+
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
+
return 1
|
33
|
+
}
|
34
|
+
fi
|
35
|
+
|
36
|
+
# If you use bundler, this might be useful to you:
|
37
|
+
if [[ -s Gemfile ]] && {
|
38
|
+
! builtin command -v bundle >/dev/null ||
|
39
|
+
builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
|
40
|
+
}
|
41
|
+
then
|
42
|
+
printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
+
gem install bundler
|
44
|
+
fi
|
45
|
+
if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
+
then
|
47
|
+
bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
|
48
|
+
fi
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Serge Morales
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# ParseMe [![Build Status](https://travis-ci.org/sergelerator/parse_me.png?branch=master)](https://travis-ci.org/sergelerator/parse_me)
|
2
|
+
|
3
|
+
The `parse_me` gem aims to provide an easy to use plain text parser. Currently on early development, expect to be able to parse fixed and variable width files with something similar to:
|
4
|
+
|
5
|
+
rules = {
|
6
|
+
name: { required: true, length: 50 },
|
7
|
+
last_name: { required: true, length: 50 },
|
8
|
+
age: { numeric: :integer },
|
9
|
+
email: { required: true, email: true},
|
10
|
+
last_seen: { date: "YYYY-MM-DD" }
|
11
|
+
}
|
12
|
+
|
13
|
+
options = {
|
14
|
+
row_delimiter: "\r\n", # Defaults to "\n"
|
15
|
+
field_delimiter: "|"
|
16
|
+
}
|
17
|
+
|
18
|
+
result = ParseMe::VarWidthParser.parse(source_string, rules, options)
|
19
|
+
result.each do |record|
|
20
|
+
record.valid? # => Runs the validations specified in rules, returns true if all pass
|
21
|
+
record.attributes # => Returns a Hash with the parsed attributes, using rules' keys
|
22
|
+
record.name # => Returns the name attribute
|
23
|
+
record.errors # => Returns a collection of the errors encountered during validation
|
24
|
+
end
|
25
|
+
|
26
|
+
## Installation
|
27
|
+
|
28
|
+
Add this line to your application's Gemfile:
|
29
|
+
|
30
|
+
gem 'parse_me'
|
31
|
+
|
32
|
+
And then execute:
|
33
|
+
|
34
|
+
$ bundle
|
35
|
+
|
36
|
+
Or install it yourself as:
|
37
|
+
|
38
|
+
$ gem install parse_me
|
39
|
+
|
40
|
+
## Usage
|
41
|
+
|
42
|
+
Usage will be documented on the first Alpha release.
|
43
|
+
|
44
|
+
## Contributing
|
45
|
+
|
46
|
+
1. Fork it
|
47
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
48
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
49
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
50
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'parse_me/parsed_object'
|
2
|
+
require 'parse_me/input_splitting'
|
3
|
+
require 'parse_me/input_splitting/fixed'
|
4
|
+
require 'parse_me/input_splitting/var'
|
5
|
+
|
6
|
+
module ParseMe
|
7
|
+
class FixedWidthParser
|
8
|
+
extend InputSplitting::Fixed
|
9
|
+
include InputSplitting::Fixed
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module ParseMe
|
2
|
+
module InputSplitting
|
3
|
+
module Fixed
|
4
|
+
module Base
|
5
|
+
# Boolean check, validates the presence of a length rule for every attribute in the @rules
|
6
|
+
def valid_length_rules?
|
7
|
+
@rules and get_length_rules.reduce(true, :&) and valid_row_labels?
|
8
|
+
end
|
9
|
+
|
10
|
+
# Maps the length rules to an array
|
11
|
+
def get_length_rules
|
12
|
+
@length_rules ||= (labeled_rows? ? @rules.values.map(&:values).flatten : @rules.values).map{|a| a[:length]}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the fixed width of a file
|
16
|
+
def file_width
|
17
|
+
raise Exception.new("File width can't be properly calculated if length rules are invalid") unless valid_length_rules?
|
18
|
+
@file_width ||= (labeled_rows? ? @rules.first[1].values : @rules.values).map{|a| a[:length]}.reduce(0, :+) + label_length
|
19
|
+
end
|
20
|
+
|
21
|
+
# Creates the template string used to unpack a record
|
22
|
+
def unpack_pattern label = nil
|
23
|
+
(labeled_rows? ? "A#{label_length}" : '') + length_rules_for(label).map{|l| "A#{l}"}.join
|
24
|
+
end
|
25
|
+
|
26
|
+
# Splits the input into lines, uses the file width for this.
|
27
|
+
def split_input
|
28
|
+
@split_input ||= @source.delete("\n").scan(/.{#{file_width}}/)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Gets the list of attributes specified in @rules. Can optionally retrieve them from a labeled set of rules, the optional label argument should be supplied when the labeled_rows option is set.
|
32
|
+
def attributes label = nil
|
33
|
+
(labeled_rows? ? @rules[label] : @rules).keys
|
34
|
+
end
|
35
|
+
|
36
|
+
# Decodes the supplied string and returns the attributes hash
|
37
|
+
def member str
|
38
|
+
unpacked = str.unpack unpack_pattern
|
39
|
+
Hash[attributes.zip unpacked]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Maps a collection of attributes in the form of hashes.
|
43
|
+
def collection
|
44
|
+
split_input.map do |str|
|
45
|
+
labeled_rows? ? labeled_member(str, label_for(str)) : member(str)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module ParseMe
|
2
|
+
module InputSplitting
|
3
|
+
module Fixed
|
4
|
+
module Labels
|
5
|
+
# Boolean check for the parse option `:labeled_rows`
|
6
|
+
def labeled_rows?
|
7
|
+
@labeled_rows ||= @options[:labeled_rows]
|
8
|
+
end
|
9
|
+
|
10
|
+
# Boolean validation check, returns a truth value if all of the labels specified in the @rules are of the same length.
|
11
|
+
def valid_row_labels?
|
12
|
+
return true unless labeled_rows?
|
13
|
+
label_lengths.first == label_lengths.last
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns an array with the length rules in a given @rules configuration hash.
|
17
|
+
# If the label does not exist (or if the `labeled_rows?` switch is turned off)
|
18
|
+
# this method falls back to fetching the rules of the root @rules hash.
|
19
|
+
def length_rules_for label
|
20
|
+
(@rules[label] || @rules).values.map{|a| a[:length]}
|
21
|
+
end
|
22
|
+
|
23
|
+
# Gets an array with the labels specified in @rules.
|
24
|
+
def labels
|
25
|
+
@labels ||= @rules.keys.map(&:to_s).sort_by{|label| label.length }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns an array with the lengths of the labels specified in @rules
|
29
|
+
def label_lengths
|
30
|
+
@label_lengths ||= labels.map(&:length).map(&:to_i)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns the length of the labels used in the file
|
34
|
+
def label_length
|
35
|
+
@label_length ||= labeled_rows? ? label_lengths.first : 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns the label for a given string
|
39
|
+
def label_for str
|
40
|
+
str[0...label_length]
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns a Hash with the data from *str* formatted as an attribute list,
|
44
|
+
# with the keys being attribute names, and the values being the attribute
|
45
|
+
# values for that key.
|
46
|
+
def labeled_member str, label
|
47
|
+
(unpacked = str.unpack unpack_pattern(label)).shift
|
48
|
+
Hash[attributes(label).zip unpacked].merge({__label: label})
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'parse_me/input_splitting/fixed/labels'
|
2
|
+
require 'parse_me/input_splitting/fixed/base'
|
3
|
+
|
4
|
+
module ParseMe
|
5
|
+
module InputSplitting
|
6
|
+
module Fixed
|
7
|
+
def self.included(base)
|
8
|
+
base.instance_eval do
|
9
|
+
include Base
|
10
|
+
include Labels
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.extended(base)
|
15
|
+
base.instance_eval do
|
16
|
+
extend Base
|
17
|
+
extend Labels
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse source, rules, options = {}
|
22
|
+
@options = options
|
23
|
+
@options[:labeled_rows] ||= false
|
24
|
+
@options[:row_delimiter] ||= "\n"
|
25
|
+
|
26
|
+
@rules = rules
|
27
|
+
@source = source
|
28
|
+
|
29
|
+
unless valid_length_rules?
|
30
|
+
raise Exception.new('Fixed size file input requires a length rule for every attribute')
|
31
|
+
end
|
32
|
+
|
33
|
+
collection.map do |record|
|
34
|
+
ParsedObject.new(record, @rules)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
File without changes
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'parse_me/transformations'
|
2
|
+
require 'parse_me/validations'
|
3
|
+
|
4
|
+
module ParseMe
|
5
|
+
class ParsedObject
|
6
|
+
include ParseMe::Validations
|
7
|
+
include ParseMe::Transformations
|
8
|
+
|
9
|
+
attr_reader :attributes, :record_label, :rules, :errors
|
10
|
+
|
11
|
+
def initialize(attributes, rules = {})
|
12
|
+
@attributes = attributes
|
13
|
+
@record_label = @attributes.delete(:__label) if @attributes[:__label]
|
14
|
+
@rules = record_label ? rules[record_label] : rules
|
15
|
+
@errors = []
|
16
|
+
|
17
|
+
apply_validations
|
18
|
+
apply_transformations
|
19
|
+
end
|
20
|
+
|
21
|
+
def method_missing(name, *args, &b)
|
22
|
+
@attributes[name] || super
|
23
|
+
end
|
24
|
+
|
25
|
+
def transformations
|
26
|
+
@transformations ||= ParseMe::Transformations.public_instance_methods
|
27
|
+
end
|
28
|
+
|
29
|
+
def validation_rules
|
30
|
+
@validation_rules ||= ParseMe::Validations.public_instance_methods
|
31
|
+
end
|
32
|
+
|
33
|
+
def apply_transformations
|
34
|
+
rules.each do |attr, set|
|
35
|
+
(set.keys & transformations).each do |transformation|
|
36
|
+
transform(attr, transformation, set[transformation])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def transform(attr, transformation, val)
|
42
|
+
@attributes[attr] = method(transformation).call(@attributes[attr], val)
|
43
|
+
end
|
44
|
+
|
45
|
+
def apply_validations
|
46
|
+
rules.each do |attr, set|
|
47
|
+
(set.keys & validation_rules).each do |validation|
|
48
|
+
validate(attr, validation, set[validation])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def validate(attr, validation, val)
|
54
|
+
unless method(validation).call(@attributes[attr], val)
|
55
|
+
add_error("#{validation} validation failed for '#{attr}' => '#{val}'")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def valid?
|
60
|
+
errors.empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
def add_error msg
|
64
|
+
errors << msg
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
data/lib/parse_me.rb
ADDED
data/parse_me.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'parse_me/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "parse_me"
|
8
|
+
gem.version = ParseMe::VERSION
|
9
|
+
gem.authors = ["Serge Morales"]
|
10
|
+
gem.email = ["i.serge23@gmail.com"]
|
11
|
+
gem.description = "Plain text file parser"
|
12
|
+
gem.summary = "Parse your fixed or variable width plain text files using your own custom layouts, easily validate and retrieve your data."
|
13
|
+
gem.homepage = "https://github.com/sergelerator/parse_me"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
end
|