data_parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/LICENSE +20 -0
- data/README.md +59 -0
- data/Rakefile +7 -0
- data/bin/data_parser_runner.rb +30 -0
- data/data_parser.gemspec +19 -0
- data/lib/data_parser/json.rb +58 -0
- data/lib/data_parser/version.rb +3 -0
- data/lib/data_parser.rb +25 -0
- data/spec/data/clients.json +72 -0
- data/spec/lib/data_parser/json_spec.rb +70 -0
- data/spec/lib/data_parser_spec.rb +46 -0
- data/spec/spec_helper.rb +33 -0
- metadata +58 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: da6f8c4bcc0af5d44982a6d6296ad8fa65be1f04b1d6e0a9460b98bb433d8fbe
|
4
|
+
data.tar.gz: 9b99db6d45fe9b88ae7b1e9bd3a98df4f02b3a9ffdbc2292eb667f998359a74d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 56fd04bc708e7e2cbb3537b31b951a5ced133a4489b9b7ac95a242722ec144313ba94f8508ffb4e29dda4d27014f9026e2b2d9eea1c21712d92455d807d20f3a
|
7
|
+
data.tar.gz: f3733eb3bc8a50c87e1f76c553d7119006c7a5e480f25c6492703be00e7f4f5c7a39afbdc16a5a135d218092971f55bd04194a815d86067bc38fbddf2c832d0e
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2023 Rolando Alvarado
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# DataParser
|
2
|
+
|
3
|
+
DataParser is a simple code for parsing a json file with search capabilty for partial or full name and displaying of duplicate email if there is.
|
4
|
+
|
5
|
+
NOTE: Later on it can extend to process other file type like csv, pdf, text file.
|
6
|
+
|
7
|
+
## Prerequisites
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Install from source:
|
12
|
+
```
|
13
|
+
gem "data_parser", :git => "git://github.com/rolandoalvarado/data_parser.git"
|
14
|
+
```
|
15
|
+
|
16
|
+
Clone and Download the files and use it in your ruby file.
|
17
|
+
|
18
|
+
NOTE: Once this is publish as gem you can install by:
|
19
|
+
```
|
20
|
+
gem install data_parser
|
21
|
+
```
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
Searching for Partial or Full Name:
|
26
|
+
```
|
27
|
+
path = '<json file path>'
|
28
|
+
full_name = 'Mich'
|
29
|
+
result = DataParser.parse(path:).process(params: { full_name: }).names
|
30
|
+
```
|
31
|
+
|
32
|
+
Showing for duplicate email:
|
33
|
+
```
|
34
|
+
path = '<json file path>'
|
35
|
+
result = DataParser.parse(path:).process.emails
|
36
|
+
```
|
37
|
+
|
38
|
+
Using in a Ruby File:
|
39
|
+
1. Add this to your code:
|
40
|
+
```
|
41
|
+
require_relative 'lib/data_parser.rb'
|
42
|
+
```
|
43
|
+
2. Make ruby file executable
|
44
|
+
```
|
45
|
+
chmod +x bin/data_parser_runner.rb
|
46
|
+
```
|
47
|
+
3. Execute the file:
|
48
|
+
```
|
49
|
+
./bin/data_parser_runner.rb
|
50
|
+
```
|
51
|
+
|
52
|
+
Running the test:
|
53
|
+
```
|
54
|
+
rake test
|
55
|
+
```
|
56
|
+
|
57
|
+
## License
|
58
|
+
|
59
|
+
See [LICENSE](LICENSE) for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'json'
|
4
|
+
require_relative '../lib/data_parser.rb'
|
5
|
+
|
6
|
+
print 'Enter JSON File path: '
|
7
|
+
|
8
|
+
path = gets.chomp
|
9
|
+
extension = File.extname(path)
|
10
|
+
|
11
|
+
raise "File is invalid! (#{extension.inspect})" unless extension == '.json'
|
12
|
+
|
13
|
+
puts 'Enter Partial or Full Name : '
|
14
|
+
|
15
|
+
full_name = gets.chomp
|
16
|
+
|
17
|
+
result = DataParser.parse(path:).process(params: { full_name: }).names
|
18
|
+
|
19
|
+
puts "Result : #{JSON.pretty_generate(result)}"
|
20
|
+
|
21
|
+
puts 'Do you want to show duplicate email(s)? '
|
22
|
+
|
23
|
+
answer = gets.chomp
|
24
|
+
if answer&.downcase != 'yes'
|
25
|
+
puts 'You have not answered: `Yes`. Application is terminated! Thanks!'
|
26
|
+
exit
|
27
|
+
end
|
28
|
+
|
29
|
+
emails = DataParser.parse(path:).process.emails
|
30
|
+
puts "Email that has duplicate : #{JSON.pretty_generate(emails)}"
|
data/data_parser.gemspec
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
$:.push File.expand_path('../lib', __FILE__)
|
2
|
+
require 'data_parser/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'data_parser'
|
6
|
+
s.version = DataParser::VERSION
|
7
|
+
s.authors = ['Rolando Alvarado']
|
8
|
+
s.email = ['rorroland@gmail.com']
|
9
|
+
s.homepage = 'https://github.com/rolandoalvarado/data_parser'
|
10
|
+
s.summary = %q{Process data from a json file.}
|
11
|
+
s.description = %q{DataParser is a simple code for parsing a json file with search capabilty for partial or full name and showing of duplicate email if there is.}
|
12
|
+
|
13
|
+
s.rubyforge_project = 'data_parser'
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ['lib']
|
19
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module DataParser
|
5
|
+
class Json
|
6
|
+
attr_reader :path
|
7
|
+
|
8
|
+
def initialize(path:)
|
9
|
+
raise DataParser::PathMissing if path.nil?
|
10
|
+
raise DataParser::FileNotFound unless File.exists?(path)
|
11
|
+
|
12
|
+
@path = path
|
13
|
+
end
|
14
|
+
|
15
|
+
def process(params: {})
|
16
|
+
raise DataParser::EmptyString if params.empty? && params.is_a?(String)
|
17
|
+
|
18
|
+
names = []
|
19
|
+
emails = []
|
20
|
+
file = File.read(path)
|
21
|
+
search_string = params[params.keys.first.to_sym] if params && params.keys.any?
|
22
|
+
|
23
|
+
json_data(file:).each do |data|
|
24
|
+
data.map do |key, value|
|
25
|
+
if matching_name?(key:, search_string:, full_name: value)
|
26
|
+
names << data
|
27
|
+
elsif key == 'email'
|
28
|
+
emails << value
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
OpenStruct.new(names:, emails: get_duplicates(array: emails))
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def matching_name?(key:, search_string:, full_name:)
|
39
|
+
key == 'full_name' && search_for(search_string:, value: full_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_duplicates(array:)
|
43
|
+
duplicates = []
|
44
|
+
array.each do |element|
|
45
|
+
duplicates << element if array.count(element) > 1
|
46
|
+
end
|
47
|
+
duplicates.uniq
|
48
|
+
end
|
49
|
+
|
50
|
+
def search_for(search_string:, value:)
|
51
|
+
value =~ /^#{search_string}/i
|
52
|
+
end
|
53
|
+
|
54
|
+
def json_data(file:)
|
55
|
+
@json_data ||= JSON.parse(file)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/data_parser.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require_relative 'data_parser/json.rb'
|
2
|
+
|
3
|
+
module DataParser
|
4
|
+
extend self
|
5
|
+
|
6
|
+
# Default exception class for DataParser.
|
7
|
+
class Exception < ::StandardError
|
8
|
+
end
|
9
|
+
|
10
|
+
# Exception that is raised if file is not found.
|
11
|
+
class FileNotFound < DataParser::Exception
|
12
|
+
end
|
13
|
+
|
14
|
+
# Exception that is raised if path is empty.
|
15
|
+
class PathMissing < DataParser::Exception
|
16
|
+
end
|
17
|
+
|
18
|
+
# Exception that is raised if param is an empty string.
|
19
|
+
class EmptyString < DataParser::Exception
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.parse(path:)
|
23
|
+
DataParser::Json.new(path:)
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"id": 2,
|
4
|
+
"full_name": "Jane Smith",
|
5
|
+
"email": "jane.smith@yahoo.com"
|
6
|
+
},
|
7
|
+
{
|
8
|
+
"id": 3,
|
9
|
+
"full_name": "Alex Johnson",
|
10
|
+
"email": "alex.johnson@hotmail.com"
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"id": 4,
|
14
|
+
"full_name": "Michael Williams",
|
15
|
+
"email": "michael.williams@outlook.com"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"id": 5,
|
19
|
+
"full_name": "Emily Brown",
|
20
|
+
"email": "emily.brown@aol.com"
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"id": 6,
|
24
|
+
"full_name": "William Davis",
|
25
|
+
"email": "william.davis@icloud.com"
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"id": 7,
|
29
|
+
"full_name": "Olivia Miller",
|
30
|
+
"email": "olivia.miller@protonmail.com"
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"id": 8,
|
34
|
+
"full_name": "James Wilson",
|
35
|
+
"email": "james.wilson@yandex.com"
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"id": 9,
|
39
|
+
"full_name": "Ava Taylor",
|
40
|
+
"email": "ava.taylor@mail.com"
|
41
|
+
},
|
42
|
+
{
|
43
|
+
"id": 10,
|
44
|
+
"full_name": "Michael Brown",
|
45
|
+
"email": "michael.brown@inbox.com"
|
46
|
+
},
|
47
|
+
{
|
48
|
+
"id": 11,
|
49
|
+
"full_name": "Sophia Garcia",
|
50
|
+
"email": "sophia.garcia@zoho.com"
|
51
|
+
},
|
52
|
+
{
|
53
|
+
"id": 12,
|
54
|
+
"full_name": "Emma Lopez",
|
55
|
+
"email": "emma.lopez@protonmail.ch"
|
56
|
+
},
|
57
|
+
{
|
58
|
+
"id": 13,
|
59
|
+
"full_name": "Liam Martinez",
|
60
|
+
"email": "liam.martinez@fastmail.fm"
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"id": 14,
|
64
|
+
"full_name": "Isabella Rodriguez",
|
65
|
+
"email": "isabella.rodriguez@me.com"
|
66
|
+
},
|
67
|
+
{
|
68
|
+
"id": 15,
|
69
|
+
"full_name": "Another Jane Smith",
|
70
|
+
"email": "jane.smith@yahoo.com"
|
71
|
+
}
|
72
|
+
]
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe DataParser::Json do
|
4
|
+
subject { described_class.new(path:) }
|
5
|
+
|
6
|
+
let(:path) { data_path('clients.json') }
|
7
|
+
|
8
|
+
describe '#initialize' do
|
9
|
+
context 'with arguments' do
|
10
|
+
it 'initializes arguments and responds to a method' do
|
11
|
+
expect(subject).to respond_to(:process)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context 'with no arguments' do
|
16
|
+
it 'raises ArgumentError' do
|
17
|
+
expect { described_class.new({}) }.to raise_error(ArgumentError)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
context 'when path is empty' do
|
22
|
+
it 'raises DataParser::PathMissing' do
|
23
|
+
expect { described_class.new(path: nil) }.to raise_error(DataParser::PathMissing)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'when file does not exists' do
|
28
|
+
let(:path) { data_path('client.json') }
|
29
|
+
|
30
|
+
it 'raises DataParser::FileNotFound' do
|
31
|
+
expect { described_class.new(path:) }.to raise_error(DataParser::FileNotFound)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe '.process' do
|
37
|
+
context 'when param is an emptry string' do
|
38
|
+
it 'raises DataParser::EmptyString' do
|
39
|
+
expect { subject.process(params: '') }.to raise_error(DataParser::EmptyString)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context 'when searching for partially matching name' do
|
44
|
+
let(:full_name) { 'Mich' }
|
45
|
+
|
46
|
+
it 'returns an array of matching names' do
|
47
|
+
expect(subject.process(params: { full_name: }).names).to eq(
|
48
|
+
[
|
49
|
+
{
|
50
|
+
'email' => 'michael.williams@outlook.com',
|
51
|
+
'full_name' => 'Michael Williams',
|
52
|
+
'id' => 4
|
53
|
+
},
|
54
|
+
{
|
55
|
+
'email' => 'michael.brown@inbox.com',
|
56
|
+
'full_name' => 'Michael Brown',
|
57
|
+
'id' => 10
|
58
|
+
}
|
59
|
+
]
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'with duplicate emails' do
|
65
|
+
it 'returns an array of emails having duplicate' do
|
66
|
+
expect(subject.process.emails).to eq(%w(jane.smith@yahoo.com))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DataParser do
|
4
|
+
subject { DataParser.parse(path:) }
|
5
|
+
|
6
|
+
let(:path) { data_path('clients.json') }
|
7
|
+
|
8
|
+
it 'does have a VERSION constant' do
|
9
|
+
expect(DataParser.const_defined?('VERSION')).to be(true)
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '.parse' do
|
13
|
+
it 'returns an instance of DataParser::Json' do
|
14
|
+
expect(DataParser.parse(path: data_path('clients.json')).class).to eq(DataParser::Json)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '.process' do
|
19
|
+
context 'when searching for partially matching name' do
|
20
|
+
let(:full_name) { 'Mich' }
|
21
|
+
|
22
|
+
it 'returns an array of matching names' do
|
23
|
+
expect(subject.process(params: { full_name: }).names).to eq(
|
24
|
+
[
|
25
|
+
{
|
26
|
+
'email' => 'michael.williams@outlook.com',
|
27
|
+
'full_name' => 'Michael Williams',
|
28
|
+
'id' => 4
|
29
|
+
},
|
30
|
+
{
|
31
|
+
'email' => 'michael.brown@inbox.com',
|
32
|
+
'full_name' => 'Michael Brown',
|
33
|
+
'id' => 10
|
34
|
+
}
|
35
|
+
]
|
36
|
+
)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'with duplicate emails' do
|
41
|
+
it 'returns an array of emails having duplicate' do
|
42
|
+
expect(subject.process.emails).to eq(%w(jane.smith@yahoo.com))
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'benchmark'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'rbconfig'
|
6
|
+
|
7
|
+
require 'data_parser'
|
8
|
+
|
9
|
+
module FileHelpers
|
10
|
+
def dimensions_for_path(path)
|
11
|
+
width, height = `identify -format '%wx%h' #{path}`.strip.split('x').map(&:to_f)
|
12
|
+
end
|
13
|
+
|
14
|
+
def data_path(name)
|
15
|
+
path = File.expand_path('./spec/data/')
|
16
|
+
File.join(path, name)
|
17
|
+
end
|
18
|
+
|
19
|
+
def tmp_dir
|
20
|
+
path = File.expand_path('./tmp')
|
21
|
+
Dir.mkdir(path) unless File.directory?(path)
|
22
|
+
path
|
23
|
+
end
|
24
|
+
|
25
|
+
def tmp_path(name)
|
26
|
+
File.join(tmp_dir, name)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
RSpec.configure do |config|
|
31
|
+
config.include(FileHelpers)
|
32
|
+
config.filter_run_excluding :windows => true if RbConfig::CONFIG['host_os'].match(/mswin|mingw|cygwin/) == nil
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rolando Alvarado
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-08-07 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: DataParser is a simple code for parsing a json file with search capabilty
|
14
|
+
for partial or full name and showing of duplicate email if there is.
|
15
|
+
email:
|
16
|
+
- rorroland@gmail.com
|
17
|
+
executables:
|
18
|
+
- data_parser_runner.rb
|
19
|
+
extensions: []
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- ".gitignore"
|
23
|
+
- Gemfile
|
24
|
+
- LICENSE
|
25
|
+
- README.md
|
26
|
+
- Rakefile
|
27
|
+
- bin/data_parser_runner.rb
|
28
|
+
- data_parser.gemspec
|
29
|
+
- lib/data_parser.rb
|
30
|
+
- lib/data_parser/json.rb
|
31
|
+
- lib/data_parser/version.rb
|
32
|
+
- spec/data/clients.json
|
33
|
+
- spec/lib/data_parser/json_spec.rb
|
34
|
+
- spec/lib/data_parser_spec.rb
|
35
|
+
- spec/spec_helper.rb
|
36
|
+
homepage: https://github.com/rolandoalvarado/data_parser
|
37
|
+
licenses: []
|
38
|
+
metadata: {}
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
requirements: []
|
54
|
+
rubygems_version: 3.4.5
|
55
|
+
signing_key:
|
56
|
+
specification_version: 4
|
57
|
+
summary: Process data from a json file.
|
58
|
+
test_files: []
|