dif 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 03c5b2d2fad359b79dda2614b3611f607306d5be
4
+ data.tar.gz: e20d6cd0eecdd5019f68bf80326cb48e17481540
5
+ SHA512:
6
+ metadata.gz: a51b0386f859202a8746c94e455c65ee2fae3666addd7710bb91a7c7f93e2968c0b653b98d2fefc78cc5dc227acbcc14da92011ad73ad2e64c5473adb6fc28ff
7
+ data.tar.gz: 48c1b5a1d21829509ff674f12c026a112f5411962e125ce6a2e6818656f1ac73fdd6772afa33f955e00cc127ecbb366e01c5165ed486f69fb20b94b356977415
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in dif.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Carlos Troncoso
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Dif
2
+
3
+ Dif provides a simple method to READ a dif (Data Interchange File Format),
4
+ and convert to a standard CSV file.
5
+ A WRITE module will follow in further versions, if there is any interest,
6
+ but as of now, being diff a legacy file format, the only use I've found is
7
+ to read from them.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'dif'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install dif
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/dif.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'dif/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "dif"
8
+ spec.version = Dif::VERSION
9
+ spec.authors = ["Carlos Troncoso"]
10
+ spec.email = ["ctroncoso@thinkmint.cl"]
11
+ spec.description = %q{
12
+ Dif provides a simple method to READ a dif (Data Interchange File Format),
13
+ and convert to a standard CSV file.
14
+ A WRITE module will follow in further versions, if there is any interest,
15
+ but as of now, being diff a legacy file format, the only use I've foud is
16
+ to read from them.}
17
+ spec.summary = %q{Provides a simple method to READ .dif files}
18
+ spec.homepage = ""
19
+ spec.license = "MIT"
20
+
21
+ spec.files = `git ls-files`.split($/)
22
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
23
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
24
+ spec.require_paths = ["lib"]
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.3"
27
+ spec.add_development_dependency "rake"
28
+ end
data/lib/dif.rb ADDED
@@ -0,0 +1,103 @@
1
+ # File Header has the following structure
2
+ # TABLE
3
+ # 0,1
4
+ # ""
5
+
6
+ # The number of rows comes after "VECTORS"
7
+ # VECTORS
8
+ # 0,21 <--- Second value
9
+ # ""
10
+
11
+ # The number of rows comes after "TUPLES"
12
+ # TUPLES
13
+ # 0,26032 <--- Second value
14
+ # ""
15
+
16
+ # Data marker. This is a 4 line structure that signals the begining of the DATA section
17
+ # DATA
18
+ # 0,0
19
+ # ""
20
+
21
+ # BOT Begining of Tuple. Actual data starts after this two line stucture.
22
+ # -1,0
23
+ # BOT
24
+
25
+ require "dif/version"
26
+ require "dif/line_helpers"
27
+ require "csv"
28
+
29
+ module Dif
30
+ class Reader
31
+ attr_reader :rows_count, :column_count, :lines, :csv
32
+ ALLOWED_COMMANDS = %w(EOD BOT)
33
+
34
+ def initialize(file, encoding="IBM850")
35
+ @lines = ::File.read(file, :external_encoding => encoding, :internal_encoding => "UTF-8").lines
36
+ @csv = CSV.new("", col_sep: "\t")
37
+ @line_buffer=Array.new
38
+ fix_lines
39
+ set_rows
40
+ set_columns
41
+ @lines.slice! 0..data_section_start_at_line
42
+ read_data
43
+ end
44
+
45
+ def export_csv
46
+ @csv.string
47
+ end
48
+
49
+ private
50
+
51
+ def set_rows
52
+ # find the line with VECTORS, move to the next, split, get last item, convert to integer and save in instance variable.
53
+ vector_line =lines.index("VECTORS")
54
+ @rows_count = lines[vector_line.next].split(",").last.to_i
55
+ end
56
+
57
+ def set_columns
58
+ # find the line with TUPLES, move to the next, split, get last item, convert to integer and save in instance variable.
59
+ tuple_line = lines.index("TUPLES")
60
+ @column_count = lines[tuple_line.next].split(",").last.to_i
61
+ end
62
+
63
+ def data_section_start_at_line
64
+ @data_section_start_at_line ||= lines.index("DATA")
65
+ end
66
+
67
+ def read_data
68
+ @lines.slice! 0..lines.index("BOT") # remove everything up to first BOT
69
+ lines.each_with_index do |line,index| #iterate over data section
70
+ # only looking for lines in the form of -1,15 (two digits with a comma between)
71
+ line_eval = /(?<command>-*\d),(?<value>\d+)/.match(line)
72
+ next if not line_eval
73
+
74
+ case line_eval[:command]
75
+ when "-1"
76
+ process_command(index)
77
+ when "0" #value
78
+ @line_buffer << line_eval[:value].to_i
79
+ when "1"
80
+ @line_buffer << lines[index.next].sub(/^"/,"").sub(/"$/,"")
81
+ end
82
+
83
+ end
84
+ end
85
+
86
+
87
+ def process_command(index)
88
+ command = lines[index.next]
89
+ raise "Command not in allowed list" if !ALLOWED_COMMANDS.include? command
90
+
91
+ csv << @line_buffer if !@line_buffer.empty?
92
+
93
+ case command
94
+ when "BOT"
95
+ @line_buffer.clear
96
+ when "EOD"
97
+ return
98
+ end
99
+ end
100
+
101
+
102
+ end
103
+ end
@@ -0,0 +1,14 @@
1
+ # These methods are a set of private methods that extends capabilities
2
+
3
+ module Dif
4
+ class Reader
5
+ private
6
+ def fix_lines
7
+ @lines.map! do |line|
8
+ line.chomp!
9
+ end
10
+ end
11
+
12
+
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module Dif
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dif
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Carlos Troncoso
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: "\n Dif provides a simple method to READ a dif (Data Interchange File
42
+ Format),\n and convert to a standard CSV file.\n A WRITE module will follow
43
+ in further versions, if there is any interest, \n but as of now, being diff a
44
+ legacy file format, the only use I've foud is \n to read from them."
45
+ email:
46
+ - ctroncoso@thinkmint.cl
47
+ executables: []
48
+ extensions: []
49
+ extra_rdoc_files: []
50
+ files:
51
+ - .gitignore
52
+ - Gemfile
53
+ - LICENSE.txt
54
+ - README.md
55
+ - Rakefile
56
+ - dif.gemspec
57
+ - lib/dif.rb
58
+ - lib/dif/line_helpers.rb
59
+ - lib/dif/version.rb
60
+ homepage: ''
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.1.4
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Provides a simple method to READ .dif files
84
+ test_files: []