diffxml 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/README.md +4 -2
- data/diffxml.gemspec +1 -1
- data/lib/diffxml.rb +6 -2
- data/rspec/diffXML_spec.rb +7 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 26d733a1a7c9f08ef8c081456aacb00972a54558
|
|
4
|
+
data.tar.gz: a8d24b26f97175818bd199436c3df76b2dbc962a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f5578ee34dd1776e2dcdc9ab9b1afacba42208702b6f858edd7d7de3be98f274627640289cb2e691f490a9d1cf46e1ed757aaca049b34a246ac136d2502b59fc
|
|
7
|
+
data.tar.gz: 4ba3c68fba21b859e1985445b826cb093685926998eb32fc9da3fa0e0d18b5a146baa4a5da028001218d9e9d4e22078491e733672d3cda8a2ac603c0b4eb0d4f
|
data/.DS_Store
CHANGED
|
Binary file
|
data/README.md
CHANGED
|
@@ -39,12 +39,14 @@ the returned value will be an array with the XPaths of all nodes that were not m
|
|
|
39
39
|
* Plans to return the values of both nodes that are at the XPath in the array, as well as the XPath location are in the works.
|
|
40
40
|
* General upkeep and a more rigorous test set are also planned.
|
|
41
41
|
* RDoc implementation for documentation.
|
|
42
|
-
* optimize searches
|
|
42
|
+
* optimize searches: the memory handling has been improved, however, the search still does not differentiate between nodes with the same path, meaning xmls in different orders may report false negatives(untested)
|
|
43
|
+
because it just compares the string of the node set as opposed to comparing each node in the set individually.
|
|
43
44
|
* Add ignore capabilities for XPaths
|
|
44
45
|
* Refactor Utility methods into seperate file
|
|
45
46
|
|
|
46
47
|
## Known Issues
|
|
47
|
-
* With large XML documents, specifically with tested documents over 1500 elements, but possibly fewer, the gem will reach a point where it cannot allocate memory
|
|
48
|
+
* ~~With large XML documents, specifically with tested documents over 1500 elements, but possibly fewer, the gem will reach a point where it cannot allocate memory.~~
|
|
49
|
+
* Fixed in latest commit, will be applied with version 0.2.0 release, optimization of the compare is still needed. HUGE increase in speed when only collecting namespaces once 5500 seconds to 55 seconds!
|
|
48
50
|
|
|
49
51
|
## Contributing
|
|
50
52
|
|
data/diffxml.gemspec
CHANGED
data/lib/diffxml.rb
CHANGED
|
@@ -3,12 +3,16 @@ require 'Nokogiri'
|
|
|
3
3
|
module DiffXML
|
|
4
4
|
@xpathArray = []
|
|
5
5
|
def self.compareXML(doc1, doc2)
|
|
6
|
+
@namespaces = doc1.collect_namespaces
|
|
6
7
|
if doc1.class == Nokogiri::XML::Document
|
|
7
8
|
collectXPaths(doc1.root)
|
|
8
9
|
else
|
|
9
10
|
collectXPaths(doc1)
|
|
10
11
|
end
|
|
11
|
-
@xpathArray.delete_if
|
|
12
|
+
@xpathArray.delete_if.with_index do |element, i|
|
|
13
|
+
puts "iteration #{i} and #{element}"
|
|
14
|
+
compareToPath(element, doc1, doc2)
|
|
15
|
+
end
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
def self.getPath(node, path = nil)
|
|
@@ -24,7 +28,7 @@ module DiffXML
|
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
def self.compareToPath(path, doc1, doc2)
|
|
27
|
-
doc2.search(path,
|
|
31
|
+
doc2.search(path, @namespaces).to_s == doc1.search(path, @namespaces).to_s
|
|
28
32
|
end
|
|
29
33
|
|
|
30
34
|
def self.collectXPaths(doc)
|
data/rspec/diffXML_spec.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
require 'nokogiri'
|
|
2
2
|
require 'rspec'
|
|
3
3
|
require 'rspec/matchers'
|
|
4
|
-
require_relative '../lib/
|
|
4
|
+
require_relative '../lib/diffxml'
|
|
5
5
|
|
|
6
6
|
describe DiffXML do
|
|
7
7
|
xml1 = Nokogiri::XML("<doc xmlns='foo:bar'><first>foo bar baz</first><second>things</second><third>
|
|
@@ -41,4 +41,10 @@ describe DiffXML do
|
|
|
41
41
|
expect(DiffXML.getXPathArray[0].to_s).to eql 'doc/third/firstthird/finalChild'
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
+
it 'should handle large xml documents without running out of memory' do
|
|
45
|
+
largeXML1 = Nokogiri::XML(File.read('./wikimediaxml_test.xml'))
|
|
46
|
+
largeXML2 = Nokogiri::XML(File.read('./wikimediaxml_test.xml'))
|
|
47
|
+
expect(DiffXML.compareXML(largeXML1,largeXML2).size).to be 0
|
|
48
|
+
end
|
|
49
|
+
|
|
44
50
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: diffxml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jake Bubnar
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2016-03-
|
|
11
|
+
date: 2016-03-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|