simple_xml_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 45afd6e422ed64eafb1230a18868283a63829269bb2832a3d593414bcf9c86c4
4
+ data.tar.gz: 309bd5a23cbb2548beba4d4665c2b6f5314efa9e504c22548c3d12ece5d8cb46
5
+ SHA512:
6
+ metadata.gz: 94ebcd0970acbac78dab7eb6c7efaa89d7686013b6c03aace174f3c99f42ca3cefcabf6f870759ba6542c567c6dc50221b0bb2a1381e25cd2f9128706871bd5d
7
+ data.tar.gz: 654c05a9d1b30f6dd321c91915787ed747aa5e56b21959b96e61c9bac79e23e9244cc0903d6bc211c5f2c09d9ff0d4def20f757396beb36b64400f2f79cc8ce0
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.0
6
+ before_install: gem install bundler -v 2.1.2
@@ -0,0 +1,3 @@
1
+ ### v0.1.0
2
+
3
+ * Copied files from fide_xml_parser gem/repo and modified them.
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in simple_xml_parser.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rspec", "~> 3.0"
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Keith Bennett
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,40 @@
1
+ # SimpleXmlParser
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/simple_xml_parser`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'simple_xml_parser'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle install
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install simple_xml_parser
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/simple_xml_parser.
36
+
37
+
38
+ ## License
39
+
40
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,13 @@
1
+ require "simple_xml_parser/version"
2
+
3
+ module SimpleXmlParser
4
+
5
+ lib_dir = File.dirname(__FILE__)
6
+ file_mask = File.join(lib_dir, '**', '*.rb')
7
+ Dir[file_mask].each do |ruby_file|
8
+ require ruby_file
9
+ end
10
+
11
+ class Error < StandardError; end
12
+
13
+ end
@@ -0,0 +1,36 @@
1
+ require_relative 'parser'
2
+
3
+ module SimpleXmlParser
4
+
5
+ # This class is provided as an example of how the Parser can be subclassed
6
+ # to implement specialized behavior for your data file.
7
+ #
8
+ # It parses chess player/rating "combined" XML files provided at
9
+ # https://ratings.fide.com/download_lists.phtml.
10
+ class FideXmlParser < Parser
11
+
12
+ # These field names will work even if field_name_renames are provided, because
13
+ # they are accessed before the renaming is done.
14
+ INTEGER_FIELDS = %w[
15
+ birthday
16
+ k
17
+ blitz_k
18
+ rapid_k
19
+ rating
20
+ blitz_rating
21
+ rapid_rating
22
+ games
23
+ blitz_games
24
+ rapid_games
25
+ ].map(&:freeze)
26
+
27
+ def initialize(key_filter: nil, record_filter: nil, field_name_renames: nil)
28
+ super(array_name: 'playerslist',
29
+ record_name: 'player',
30
+ integer_fields: INTEGER_FIELDS,
31
+ key_filter: key_filter,
32
+ record_filter: record_filter,
33
+ field_name_renames: field_name_renames)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,63 @@
1
+ require 'json'
2
+
3
+ module SimpleXmlParser
4
+
5
+ class JsonWriter
6
+
7
+ attr_reader :records
8
+
9
+ def initialize(records)
10
+ @records = records
11
+ end
12
+
13
+
14
+ # Checks all input filespecs before processing the first one.
15
+ # Verifies not nil, ends in ".xml" (case insensitive), and exists as a file.
16
+ def validate_input_filespecs(filespecs)
17
+ filespecs = Array(filespecs)
18
+ bad_filespecs = filespecs.select do |filespec|
19
+ filespec.nil? || (! /\.xml$/.match(filespec)) || (! File.file?(filespec))
20
+ end
21
+ if bad_filespecs.any?
22
+ raise "The following filespecs were not valid XML filespecs: #{bad_filespecs.join(', ')}"
23
+ end
24
+ end
25
+
26
+
27
+ # Public entry point to write JSON file(s) from XML.
28
+ # To write a single file, pass the filespec as the `input_filespecs` parameter.
29
+ # To write multiple files, pass an array of filespecs as the `input_filespecs` parameter
30
+ # json_mode: :pretty for human readable JSON, :compact for compact JSON
31
+ # Default json_filespec will be constructed from the input file, just replacing 'xml' with 'json'.
32
+ def write(input_filespec, json_mode: :pretty, json_filespec: nil)
33
+ if input_filespec.is_a?(Array)
34
+ raise Error.new("This method is used only for single files, use write_multiple for multiple files.")
35
+ end
36
+
37
+ validate_input_filespecs(Array[input_filespec])
38
+ write_private(input_filespec, json_mode: json_mode, json_filespec: json_filespec)
39
+ end
40
+
41
+
42
+ # Public entry point to write multiple files.
43
+ # json_mode: :pretty for human readable JSON, :compact for compact JSON
44
+ def write_multiple(input_filespecs, json_mode: :pretty)
45
+ validate_input_filespecs(input_filespecs)
46
+ input_filespecs.each do |input_filespec|
47
+ write_private(input_filespec, json_mode: json_mode)
48
+ end
49
+ end
50
+
51
+
52
+ # Implementation for writing a single file.
53
+ # Separated from the public `write` method in order to validate filespecs only once.
54
+ # Default json_filespec will be constructed from the input file, just replacing 'xml' with 'json'.
55
+ private
56
+ def write_private(input_filespec, json_mode: :pretty, json_filespec: nil)
57
+ json_text = (json_mode == :pretty) ? JSON.pretty_generate(records) : records.to_json
58
+ json_filespec ||= input_filespec.sub(/\.xml$/, '.json')
59
+ File.write(json_filespec, json_text)
60
+ puts "#{records.size} records processed, #{input_filespec} --> #{json_filespec}"
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,142 @@
1
+ require 'awesome_print'
2
+ require 'nokogiri'
3
+
4
+ module SimpleXmlParser
5
+
6
+ # A field_name_renames hash can be provided.
7
+ # Keys are the field names in the XML input, values are the names in the output JSON, e.g.:
8
+ # {
9
+ # 'rating' => 'standard_rating',
10
+ # 'games' => 'standard_games'
11
+ # }
12
+
13
+ # Supports key and record filters:
14
+
15
+ # For key filter, pass a lambda that takes a key name as a parameter
16
+ # and returns true to include it, false to exclude it,
17
+ # e.g. to exclude 'foo' and 'bar', do this:
18
+ # processor.key_filter = ->(key) { ! %w(foo bar).include?(key) }
19
+
20
+ # For record filter, pass a lambda that takes a record as a parameter,
21
+ # and returns true to include it or false to exclude it,
22
+ # e.g. to include only records with a "title", do this:
23
+ # processor.record_filter = ->(rec) { rec.title }
24
+ # If a field name has been changed via the field_name_renames hash, the new name should be used in the filter.
25
+
26
+ class Parser < Nokogiri::XML::SAX::Document
27
+
28
+ attr_reader :start_time
29
+
30
+ # Constructor parameters:
31
+ attr_reader :array_name, :record_name, :integer_fields
32
+
33
+ # User-provided callbacks:
34
+ attr_accessor :key_filter, :record_filter, :field_name_renames
35
+
36
+ # For internal use:
37
+ attr_accessor :current_property_name, :record, :records, :input_record_count, :output_record_count
38
+
39
+ ANSI_GO_TO_LINE_START = "\e[1G"
40
+
41
+ def initialize(array_name:, record_name:, integer_fields: nil,
42
+ key_filter: nil, record_filter: nil, field_name_renames: nil)
43
+ @array_name = array_name
44
+ @record_name = record_name
45
+ @integer_fields = integer_fields
46
+ @key_filter = key_filter
47
+ @record_filter = record_filter
48
+ @field_name_renames = field_name_renames
49
+ @current_property_name = nil
50
+ @record = {}
51
+ @records = []
52
+ @start_time = current_time
53
+ @keys_to_exclude = []
54
+ @input_record_count = 0
55
+ @output_record_count = 0
56
+ end
57
+
58
+
59
+ def parse(data_source)
60
+ data_source = File.new(data_source) if data_source.is_a?(String)
61
+ parser = Nokogiri::XML::SAX::Parser.new(self)
62
+ parser.parse(data_source)
63
+ records
64
+ end
65
+
66
+
67
+ def current_time
68
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
69
+ end
70
+
71
+
72
+ def output_status
73
+ print ANSI_GO_TO_LINE_START
74
+ print "Records processed: %9d kept: %9d Seconds elapsed: %11.2f" % [
75
+ input_record_count,
76
+ output_record_count,
77
+ current_time - start_time
78
+ ]
79
+ end
80
+
81
+
82
+ def start_element(name, _attrs)
83
+ case name
84
+ when array_name
85
+ # ignore
86
+ when record_name
87
+ self.input_record_count += 1
88
+ output_status if input_record_count % 1000 == 0
89
+ else # this is a field in the players record; process it as such
90
+ self.current_property_name = name
91
+ end
92
+ end
93
+
94
+
95
+ def end_element(name)
96
+ case name
97
+ when array_name # end of data, write JSON file
98
+ finish
99
+ when record_name
100
+ if record_filter.nil? || record_filter.(record)
101
+ self.output_record_count += 1
102
+ records << record
103
+ end
104
+ self.record = {}
105
+ else
106
+ self.current_property_name = nil
107
+ end
108
+ end
109
+
110
+
111
+ def output_field_name(input_field_name)
112
+ return input_field_name if field_name_renames.nil?
113
+ field_name_renames[input_field_name] || input_field_name
114
+ end
115
+
116
+
117
+ def maybe_convert_to_integer(field_name, value)
118
+ needs_conversion = integer_fields&.include?(field_name)
119
+ needs_conversion ? Integer(value) : value
120
+ end
121
+
122
+
123
+ def include_this_field?(field_name)
124
+ key_filter.nil? || key_filter.(field_name)
125
+ end
126
+
127
+
128
+ def characters(string)
129
+ if current_property_name && include_this_field?(current_property_name)
130
+ key = output_field_name(current_property_name)
131
+ value = maybe_convert_to_integer(current_property_name, string)
132
+ record[key] = value
133
+ end
134
+ end
135
+
136
+
137
+ def finish
138
+ output_status
139
+ puts
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleXmlParser
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ require_relative 'lib/simple_xml_parser/version'
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "simple_xml_parser"
5
+ spec.version = SimpleXmlParser::VERSION
6
+ spec.authors = ["Keith Bennett"]
7
+ spec.email = ["keithrbennett@gmail.com"]
8
+
9
+ spec.summary = %q{Parses simple (record array) XML text.}
10
+ spec.description = %q{Parses XML containing an array of records into an array of Ruby hashes.}
11
+ spec.homepage = "https://github.com/keithrbennett/simple_xml_parser"
12
+ spec.license = "MIT"
13
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
+
15
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
16
+
17
+ spec.metadata["homepage_uri"] = spec.homepage
18
+ spec.metadata["source_code_uri"] = spec.homepage
19
+ spec.metadata["changelog_uri"] = spec.homepage
20
+
21
+ # Specify which files should be added to the gem when it is released.
22
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
23
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
24
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(bin|test|spec|features)/}) }
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_dependency "nokogiri", "~>1.10"
31
+ spec.add_dependency "pry", "~> 0.12"
32
+ spec.add_dependency "awesome_print", "~> 1.8"
33
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_xml_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Keith Bennett
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-03-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.12'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.8'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.8'
55
+ description: Parses XML containing an array of records into an array of Ruby hashes.
56
+ email:
57
+ - keithrbennett@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - CHANGELOG.md
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - lib/simple_xml_parser.rb
71
+ - lib/simple_xml_parser/fide_xml_parser.rb
72
+ - lib/simple_xml_parser/json_writer.rb
73
+ - lib/simple_xml_parser/parser.rb
74
+ - lib/simple_xml_parser/version.rb
75
+ - simple_xml_parser.gemspec
76
+ homepage: https://github.com/keithrbennett/simple_xml_parser
77
+ licenses:
78
+ - MIT
79
+ metadata:
80
+ allowed_push_host: https://rubygems.org
81
+ homepage_uri: https://github.com/keithrbennett/simple_xml_parser
82
+ source_code_uri: https://github.com/keithrbennett/simple_xml_parser
83
+ changelog_uri: https://github.com/keithrbennett/simple_xml_parser
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 2.3.0
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubygems_version: 3.0.6
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: Parses simple (record array) XML text.
103
+ test_files: []