modsulator 1.0.5 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,150 +0,0 @@
1
- RSpec.describe Normalizer do
2
- before :all do
3
- # Create common variables shared across tests
4
- @fixtures_dir = File.expand_path("../../fixtures", __FILE__)
5
- @normalizer = Normalizer.new
6
- end
7
-
8
-
9
- describe "clean_text" do
10
- it "formats text blocks correctly" do
11
- bad_string = " This is some text with more
12
-
13
-
14
- than one
15
-
16
-
17
- problem
18
-
19
- inside
20
-
21
- "
22
- expect(@normalizer.clean_text(bad_string)).to eq("This is some text with more than one problem inside")
23
- end
24
-
25
- it "returns nil given a nil input" do
26
- expect(@normalizer.clean_text(nil)).to eq(nil)
27
- end
28
-
29
- it "returns nil given an empty string" do
30
- expect(@normalizer.clean_text("")).to eq(nil)
31
- end
32
-
33
- it "returns an empty string given an input that is only whitespace" do
34
- expect(@normalizer.clean_text(" ")).to eq("")
35
- end
36
- end
37
-
38
-
39
- describe "exceptional?" do
40
- it "returns false for a nil input" do
41
- expect(@normalizer.exceptional?(nil)).to be_falsey
42
- end
43
-
44
- it "returns false for an element that does not have any attributes" do
45
- no_attributes_doc = Nokogiri::XML("<root_node><typeOfResource>randomtext</typeOfResource></root_node>")
46
- expect(@normalizer.exceptional?(no_attributes_doc.root.children[0])).to be_falsey
47
- end
48
-
49
- it "returns true for an element that matches the condition" do
50
- exceptional_doc = Nokogiri::XML("<root_node><typeOfResource collection=\"yes\">randomtext</typeOfResource></root_node>")
51
- expect(@normalizer.exceptional?(exceptional_doc.root.children[0])).to be_truthy
52
- end
53
- end
54
-
55
-
56
- describe "trim_text" do
57
- it "raises an exception given a nil input" do
58
- # binding.pry
59
- expect { @normalizer.trim_text(nil) }.to raise_error(NoMethodError)
60
- end
61
-
62
- it "returns the tree unchanged, given a tree that contains no text" do
63
- no_text_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2></child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
64
- original = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2></child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
65
- @normalizer.trim_text(no_text_doc.root)
66
- expect(EquivalentXml.equivalent?(no_text_doc.root, original.root)).to be_truthy
67
- end
68
-
69
- it "correctly removes whitespace" do
70
- no_text_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2> TEXTING </child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
71
- correct_doc = Nokogiri::XML("<root_node><child1 att=\"val\"><child_2>TEXTING</child_2><child3/></child1><child4><child5 att=\"abc\"><child_6/></child5></child4></root_node>")
72
- @normalizer.trim_text(no_text_doc.root)
73
- expect(no_text_doc.to_s).to eq(correct_doc.to_s)
74
- end
75
- end
76
-
77
- describe "remove_empty_attributes" do
78
- it "raises an error given a null argument" do
79
- expect { @normalizer.remove_empty_attributes(nil) }.to raise_error(NoMethodError)
80
- end
81
-
82
- it "removes all empty attributes for a single node" do
83
- no_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"forward\" at2=\"\">Some text</child_1_2><child_1_3 bt1=\" \" bt2=\"forgery\" bt3=\"\"></child_1_3></root_node>")
84
- correct_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"forward\" at2=\"\">Some text</child_1_2><child_1_3 bt2=\"forgery\" ></child_1_3></root_node>")
85
- @normalizer.remove_empty_attributes(no_attributes_doc.root.children[2])
86
- expect(EquivalentXml.equivalent?(no_attributes_doc.root, correct_attributes_doc.root)).to be_truthy
87
- end
88
-
89
- it "removes all attributes for a node that has only empty attributes" do
90
- no_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2 at1=\"\" at2=\"\" bfk=\" \" r2d2=\" \">Some text</child_1_2></root_node>")
91
- correct_attributes_doc = Nokogiri::XML("<root_node><child_1_1/><child_1_2>Some text</child_1_2></root_node>")
92
- @normalizer.remove_empty_attributes(no_attributes_doc.root.children[1])
93
- expect(EquivalentXml.equivalent?(no_attributes_doc.root, correct_attributes_doc.root)).to be_truthy
94
- end
95
- end
96
-
97
- describe "remove_empty_nodes" do
98
- it "raises an exception given a null input" do
99
- expect { @normalizer.remove_empty_nodes(nil) }.to raise_error(NoMethodError)
100
- end
101
-
102
- it "removes all nodes, given a subtree that contains only empty nodes" do
103
- messy_doc = Nokogiri::XML("<root><child1>TCT</child1><child11/><child12><child21/><child22/><child23></child23></child12></root>")
104
- clean_doc = Nokogiri::XML("<root><child1>TCT</child1><child11/></root>")
105
- @normalizer.remove_empty_nodes(messy_doc.root.children[2])
106
- expect(EquivalentXml.equivalent?(messy_doc, clean_doc)).to be_truthy
107
- end
108
-
109
- it "removes empty nodes from a subtree that contains a mix of empty and non-empty nodes" do
110
- mixed_doc = Nokogiri::XML("<root><child_1 sf=\"one\"> TCT </child1><child1_1>,DOS<child1_2><child2_1/><child2_2/><child2_3 bf=\"\"></child2_3></child1_2></child1_1></root>")
111
- clean_doc = Nokogiri::XML("<root><child_1 sf=\"one\"> TCT </child1><child1_1>,DOS</child1_1></root>")
112
- @normalizer.remove_empty_nodes(mixed_doc.root)
113
- expect(EquivalentXml.equivalent?(mixed_doc, clean_doc)).to be_truthy
114
- end
115
- end
116
-
117
- describe "clean_linefeeds" do
118
- it "returns the given XML unchanged if there are no linefeed characters" do
119
- start_doc = Nokogiri::XML("<tableOfContents> Some text that does not have any linefeed chars. </tableOfContents>")
120
- final_doc = Nokogiri::XML("<tableOfContents> Some text that does not have any linefeed chars. </tableOfContents>")
121
- @normalizer.clean_linefeeds(start_doc.root)
122
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
123
- end
124
-
125
- it "returns the given XML node unchanged if it is not in the set { <tableOfContents>, <abstract>, <note> }" do
126
- start_doc = Nokogiri::XML("<root> Some text that does not have any linefeed chars. </root>")
127
- final_doc = Nokogiri::XML("<root> Some text that does not have any linefeed chars. </root>")
128
- @normalizer.clean_linefeeds(start_doc.root)
129
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
130
- end
131
-
132
- it "replaces <br> by &#10; and <p> by &#10;&#10;" do
133
- start_doc = Nokogiri::XML("<note> How to present text: <br>Four chances.<p>Executive orders from tall managerial summits.</p> <br/>Exonerate...</note>")
134
- final_doc = Nokogiri::XML("<note> How to present text: &#10;Four chances.&#10;&#10;Executive orders from tall managerial summits. &#10;Exonerate...</note>")
135
- @normalizer.clean_linefeeds(start_doc.root.xpath(Normalizer::LINEFEED_XPATH))
136
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
137
- end
138
-
139
- it "replaces both \n and \r by &#10; and replaces \r\n by &#10;" do
140
- start_doc = Nokogiri::XML("<abstract> Newsworthy dog:\n Bark. Adium \r\n Aquamacs \r\n Firefox \r Terminal</abstract>")
141
- final_doc = Nokogiri::XML("<abstract> Newsworthy dog:&#10; Bark. Adium &#10; Aquamacs &#10; Firefox &#10; Terminal</abstract>")
142
- @normalizer.clean_linefeeds(start_doc.root)
143
- expect(EquivalentXml.equivalent?(start_doc, final_doc)).to be_truthy
144
- end
145
-
146
- it "raises an exception given a null input" do
147
- expect { @normalizer.clean_linefeeds(nil) }.to raise_error(NoMethodError)
148
- end
149
- end
150
- end