modsulator 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,150 +0,0 @@
1
- RSpec.describe Normalizer do
2
- before :all do
3
- # Create common variables shared across tests
4
- @fixtures_dir = File.expand_path("../../fixtures", __FILE__)
5
- @normalizer = Normalizer.new
6
- end
7
-
8
-
9
- describe "clean_text" do
10
- it "formats text blocks correctly" do
11
- bad_string = " This is some text with more
12
-
13
-
14
- than one
15
-
16
-
17
- problem
18
-
19
- inside
20
-
21
- "
22
- expect(@normalizer.clean_text(bad_string)).to eq("This is some text with more than one problem inside")
23
- end
24
-
25
- it "returns nil given a nil input" do
26
- expect(@normalizer.clean_text(nil)).to eq(nil)
27
- end
28
-
29
- it "returns nil given an empty string" do
30
- expect(@normalizer.clean_text("")).to eq(nil)
31
- end
32
-
33
- it "returns an empty string given an input that is only whitespace" do
34
- expect(@normalizer.clean_text(" ")).to eq("")
35
- end
36
- end
37
-
38
-
39
- describe "exceptional?" do
40
- it "returns false for a nil input" do
41
- expect(@normalizer.exceptional?(nil)).to be_falsey
42
- end
43
-
44
- it "returns false for an element that does not have any attributes" do
45
- no_attributes_doc = Nokogiri::XML("<root_node><typeOfResource>randomtext</typeOfResource></root_node>")
46
- expect(@normalizer.exceptional?(no_attributes_doc.root.children[0])).to be_falsey
47
- end
48
-
49
- it "returns true for an element that matches the condition" do
50
- exceptional_doc = Nokogiri::XML("<root_node><typeOfResource collection=\"yes\">randomtext</typeOfResource></root_node>")
51
- expect(@normalizer.exceptional?(exceptional_doc.root.children[0])).to be_truthy
52
- end
53
- end
54
-
55
-
56
- describe "trim_text" do
57
- it "raises an exception given a nil input" do
58
- # binding.pry
59
- expect { @normalizer.trim_text(nil) }.to raise_error(NoMethodError)
60
- end
61
-
62
- it "returns the tree unchanged, given a tree that contains no text" do
63
- no_text_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2></child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
64
- original = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2></child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
65
- @normalizer.trim_text(no_text_doc.root)
66
- expect(EquivalentXml.equivalent?(no_text_doc.root, original.root)).to be_truthy
67
- end
68
-
69
- it "correctly removes whitespace" do
70
- no_text_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2> TEXTING </child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
71
- correct_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2>TEXTING</child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
72
- @normalizer.trim_text(no_text_doc.root)
73
- expect(no_text_doc.to_s).to eq(correct_doc.to_s)
74
- end
75
- end
76
-
77
- describe "remove_empty_attributes" do
78
- it "raises an error given a null argument" do
79
- expect { @normalizer.remove_empty_attributes(nil) }.to raise_error(NoMethodError)
80
- end
81
-
82
- it "removes all empty attributes for a single node" do
83
- no_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"forward\" at2=\"\">Some text</child_1_2><child_1_3 bt1=\" \" bt2=\"forgery\" bt3=\"\"></child_1_3></root_node>")
84
- correct_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"forward\" at2=\"\">Some text</child_1_2><child_1_3 bt2=\"forgery\" ></child_1_3></root_node>")
85
- @normalizer.remove_empty_attributes(no_attributes_doc.root.children[2])
86
- expect(EquivalentXml.equivalent?(no_attributes_doc.root, correct_attributes_doc.root)).to be_truthy
87
- end
88
-
89
- it "removes all attributes for a node that has only empty attributes" do
90
- no_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"\" at2=\"\" bfk=\" \" r2d2=\" \">Some text</child_1_2></root_node>")
91
- correct_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2>Some text</child_1_2></root_node>")
92
- @normalizer.remove_empty_attributes(no_attributes_doc.root.children[1])
93
- expect(EquivalentXml.equivalent?(no_attributes_doc.root, correct_attributes_doc.root)).to be_truthy
94
- end
95
- end
96
-
97
- describe "remove_empty_nodes" do
98
- it "raises an exception given a null input" do
99
- expect { @normalizer.remove_empty_nodes(nil) }.to raise_error(NoMethodError)
100
- end
101
-
102
- it "removes all nodes, given a subtree that contains only empty nodes" do
103
- messy_doc = Nokogiri::XML("<root><child1>TCT</child1><child11/><child12><child21/><child22/><child23></child23></child12></root>")
104
- clean_doc = Nokogiri::XML("<root><child1>TCT</child1><child11/></root>")
105
- @normalizer.remove_empty_nodes(messy_doc.root.children[2])
106
- expect(EquivalentXml.equivalent?(messy_doc, clean_doc)).to be_truthy
107
- end
108
-
109
- it "removes empty nodes from a subtree that contains a mix of empty and non-empty nodes" do
110
- mixed_doc = Nokogiri::XML("<root><child_1 sf=\"one\"> TCT </child1><child1_1>,DOS<child1_2><child2_1/><child2_2/><child2_3 bf=\"\"></child2_3></child1_2></child1_1></root>")
111
- clean_doc = Nokogiri::XML("<root><child_1 sf=\"one\"> TCT </child1><child1_1>,DOS</child1_1></root>")
112
- @normalizer.remove_empty_nodes(mixed_doc.root)
113
- expect(EquivalentXml.equivalent?(mixed_doc, clean_doc)).to be_truthy
114
- end
115
- end
116
-
117
- describe "clean_linefeeds" do
118
- it "returns the given XML unchanged if there are no linefeed characters" do
119
- start_doc = Nokogiri::XML("<tableOfContents> Some text that does not have any linefeed chars. </tableOfContents>")
120
- final_doc = Nokogiri::XML("<tableOfContents> Some text that does not have any linefeed chars. </tableOfContents>")
121
- @normalizer.clean_linefeeds(start_doc.root)
122
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
123
- end
124
-
125
- it "returns the given XML node unchanged if it is not in the set { <tableOfContents>, <abstract>, <note> }" do
126
- start_doc = Nokogiri::XML("<root> Some text that does not have any linefeed chars. </root>")
127
- final_doc = Nokogiri::XML("<root> Some text that does not have any linefeed chars. </root>")
128
- @normalizer.clean_linefeeds(start_doc.root)
129
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
130
- end
131
-
132
- it "replaces <br> by &#10; and <p> by &#10;&#10;" do
133
- start_doc = Nokogiri::XML("<note> How to present text: <br>Four chances.<p>Executive orders from tall managerial summits.</p> <br/>Exonerate...</note>")
134
- final_doc = Nokogiri::XML("<note> How to present text: &#10;Four chances.&#10;&#10;Executive orders from tall managerial summits. &#10;Exonerate...</note>")
135
- @normalizer.clean_linefeeds(start_doc.root.xpath(Normalizer::LINEFEED_XPATH))
136
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
137
- end
138
-
139
- it "replaces both \n and \r by &#10; and replaces \r\n by &#10;" do
140
- start_doc = Nokogiri::XML("<abstract> Newsworthy dog:\n Bark. Adium \r\n Aquamacs \r\n Firefox \r Terminal</abstract>")
141
- final_doc = Nokogiri::XML("<abstract> Newsworthy dog:&#10; Bark. Adium &#10; Aquamacs &#10; Firefox &#10; Terminal</abstract>")
142
- @normalizer.clean_linefeeds(start_doc.root)
143
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
144
- end
145
-
146
- it "raises an exception given a null input" do
147
- expect { @normalizer.clean_linefeeds(nil) }.to raise_error(NoMethodError)
148
- end
149
- end
150
- end