xml_contacts_extractor 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/lib/xml_contacts_extractor/version.rb +1 -1
- data/lib/xml_contacts_extractor.rb +25 -39
- metadata +1 -1
data/README.md
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require "xml_contacts_extractor/version"
|
2
2
|
require "nokogiri"
|
3
3
|
|
4
|
-
|
4
|
+
class XmlContactsExtractor
|
5
5
|
|
6
|
-
|
6
|
+
attr_accessor :first_name, :fname, :last_name, :lname, :email, :phone, :address, :city, :state, :zip, :ip_address
|
7
|
+
|
8
|
+
def extract_contacts(xml)
|
7
9
|
|
8
|
-
#
|
10
|
+
# Different possible variations
|
9
11
|
first_name_choices = ["first_name", "fname", "f_name", "firstname", "FirstName", "First", "name"]
|
10
12
|
last_name_choices = ["lastname", "last_name", "lname", "l_name", "LastName", "Last"]
|
11
13
|
email_choices = ["email", "email_address", "email_addy", "EmailAddress"]
|
@@ -18,85 +20,69 @@ module XmlContactsExtractor
|
|
18
20
|
|
19
21
|
xml = xml.gsub(/\n|\\n|\\/, '')
|
20
22
|
xml_doc = Nokogiri::XML(xml)
|
21
|
-
p xml_doc
|
22
23
|
|
24
|
+
# First name
|
23
25
|
first_name_choices.each do |f|
|
24
|
-
#@first_name = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
|
25
26
|
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @first_name.blank?
|
26
|
-
@first_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
27
|
+
@first_name = @fname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
27
28
|
end
|
28
29
|
end
|
29
30
|
|
31
|
+
# Last name
|
30
32
|
last_name_choices.each do |f|
|
31
|
-
|
32
|
-
|
33
|
-
@last_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
33
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @last_name.blank?
|
34
|
+
@last_name = @lname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
38
|
+
# Email
|
37
39
|
email_choices.each do |f|
|
38
|
-
|
39
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
40
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @email.blank?
|
40
41
|
@email = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
45
|
+
# Phone
|
44
46
|
phone_choices.each do |f|
|
45
|
-
|
46
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
47
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @phone.blank?
|
47
48
|
@phone = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
52
|
+
# Address
|
51
53
|
address_choices.each do |f|
|
52
|
-
|
53
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
54
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @address.blank?
|
54
55
|
@address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
55
56
|
end
|
56
57
|
end
|
57
58
|
|
59
|
+
# City
|
58
60
|
city_choices.each do |f|
|
59
|
-
|
60
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
61
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @city.blank?
|
61
62
|
@city = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
62
63
|
end
|
63
64
|
end
|
64
65
|
|
66
|
+
# State
|
65
67
|
state_choices.each do |f|
|
66
|
-
|
67
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
68
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @state.blank?
|
68
69
|
@state = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
69
70
|
end
|
70
71
|
end
|
71
72
|
|
73
|
+
# Zip
|
72
74
|
zip_choices.each do |f|
|
73
|
-
|
74
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
75
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @zip.blank?
|
75
76
|
@zip = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
76
77
|
end
|
77
78
|
end
|
78
79
|
|
80
|
+
# IP Address
|
79
81
|
ip_address_choices.each do |f|
|
80
|
-
|
81
|
-
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
|
82
|
+
if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @ip_address.blank?
|
82
83
|
@ip_address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
|
83
84
|
end
|
84
85
|
end
|
85
|
-
|
86
|
-
matched_fields = {} # attempt to find and grab required fields to be stored in physical columns
|
87
|
-
matched_fields = {
|
88
|
-
:first_name => @first_name,
|
89
|
-
:last_name => @last_name,
|
90
|
-
:address => @address,
|
91
|
-
:phone => @phone,
|
92
|
-
:email => @email,
|
93
|
-
:ip_address => @ip_address,
|
94
|
-
:city => @city,
|
95
|
-
:state => @state,
|
96
|
-
:zip => @zip
|
97
|
-
}
|
98
|
-
matched_fields || {}
|
99
|
-
p matched_fields
|
100
86
|
end
|
101
87
|
|
102
|
-
end
|
88
|
+
end
|