ol_dump_parser 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16b669a8a1373635cb95df90dcc0f1839fee37cee811a4f9fdd4cfbcc806cac6
4
- data.tar.gz: 664a046c478097ec475297eefe3fac6ffb24cc35afcdd07c5f5dc51c380bd86b
3
+ metadata.gz: 143bc6f339f1330854b00bec96a64fee9be7841ac8014f492980eb0e3c48acf7
4
+ data.tar.gz: e4c14464e5fa2de0ed9916265a0e4b4ecfca29b3730ac8bc761e12a3d1999987
5
5
  SHA512:
6
- metadata.gz: 87e9d3e172c985fa6715141ffb71584fe6ba14a1d95a45b65d560a4d91b5283c1fb8ab7525ae8a1c413408d15ca0c4f54430b9bb86441adfde454e092296211e
7
- data.tar.gz: 333b96344c009710ee077614724842ebca6c0b1c39127df25d6461510649fde86cdc8612cfee5aa5caaf1d77bea2522c2702b99bfe32700e4be4f27852aed117
6
+ metadata.gz: '0108a1fb35fcd2474dbd39435389e3cca4d8ef91121c83f105807d8d27369a885b86f34cdd4f1495b3445a9812f760b5e59f3dba858d5dd6eadec7b4c6dc7936'
7
+ data.tar.gz: 47947e3af61890ae90d91db956f5c18fd7e0390c4196ea2ace8d12cc4be4abb8ccc3d08dc85f3e3e33a357819df95f41f182997b5f11ee77e53890acd4d9021f
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 arjundas
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,19 @@
1
+ # Open Library Dump Parser
2
+
3
+ openlibrary-dump-parser is a simple ruby parser that parses the csv dump from open library into a json file
4
+
5
+ ## Getting started
6
+ ### Installation
7
+ ```sh
8
+ gem install ol_dump_parser
9
+ ```
10
+
11
+ ### Ruby sample code:
12
+ ```ruby
13
+ require 'ol_dump_parser'
14
+ # Create a ol_dump_parser object
15
+ ol_dump_parser = OlDumpParser.new({ inp_file: './inp_file_path', out_file: './output/out.json' })
16
+
17
+ # parse the data, this will create the output file
18
+ ol_dump_parser.parse
19
+ ```
@@ -1,5 +1,14 @@
1
- class OlDumpParser
2
- def self.hi
3
- puts 'hello world'
1
+ require_relative 'ol_dump_parser/parser'
2
+
3
+ # Serves as the namespace for the whole gem
4
+ module OlDumpParser
5
+ class << self
6
+
7
+ # Creates a new parser
8
+ # @param params [Hash{Symbol => String}] Input parameters to parser
9
+ def new(params)
10
+ Parser.new params
11
+ end
4
12
  end
5
13
  end
14
+
@@ -0,0 +1,50 @@
1
+ require 'csv'
2
+ require 'json'
3
+
4
+ module OlDumpParser
5
+ # Parser class that will parses ol dump
6
+ class Parser
7
+ # @return [String] Path to the input file
8
+ attr_accessor :inp_file
9
+
10
+ # @return [String] Path to the output file
11
+ attr_accessor :out_file
12
+
13
+ # Constructor for Parser
14
+ # @param [Hash] params Inputs to parser
15
+ # @option params [String] :inp_file Path to the input file
16
+ # @option params [String] :out_file Path to the output file
17
+ def initialize(params)
18
+ self.inp_file = params[:inp_file]
19
+ self.out_file = params[:out_file]
20
+ end
21
+
22
+ # Parses the input csv file and converts to a json file
23
+ def parse
24
+ output = File.open out_file, 'w'
25
+ File.open(inp_file).each do |line|
26
+ # CSV parse returns an array of rows - we need only the first
27
+ parsed_line = CSV.parse(line.chomp, col_sep: "\t", liberal_parsing: true).first
28
+ json_output = csv_row_to_json(parsed_line)
29
+ p json_output[:details]['name']
30
+ output.puts json_output
31
+ end
32
+ output.close
33
+ end
34
+
35
+ private
36
+
37
+ # Convert the given row values to a json object
38
+ # @param row_values [Array] Csv row values
39
+ # @return [Hash] Json value
40
+ def csv_row_to_json(row_values)
41
+ {
42
+ type: row_values[0],
43
+ key: row_values[1],
44
+ revision: row_values[2],
45
+ last_modified: row_values[3],
46
+ details: JSON.parse(row_values[4])
47
+ }
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ module OlDumpParser
2
+ VERSION = '1.0.0'
3
+ end
metadata CHANGED
@@ -1,24 +1,56 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ol_dump_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arjun M Das
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-27 00:00:00.000000000 Z
12
- dependencies: []
13
- description: " Open Library provides dumps of all its data, generated every month.
14
- ol_dump_parser will act as a simple parser for this data.\n"
11
+ date: 2021-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rubocop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rubocop-performance
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: " ol_dump_parser parses the Open Library Dump into a simple json file
42
+ for easy processing\n"
15
43
  email: arjundas.27586@gmail.com
16
44
  executables: []
17
45
  extensions: []
18
46
  extra_rdoc_files: []
19
47
  files:
48
+ - LICENSE
49
+ - README.md
20
50
  - lib/ol_dump_parser.rb
21
- homepage:
51
+ - lib/ol_dump_parser/parser.rb
52
+ - lib/ol_dump_parser/version.rb
53
+ homepage: https://github.com/thearjunmdas/openlibrary-dump-parser
22
54
  licenses:
23
55
  - MIT
24
56
  metadata: {}