xml_contacts_extractor 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # XmlContactsExtractor
2
2
 
3
- TODO: Write a gem description
3
+ Extracts contact information from XML.
4
4
 
5
5
  ## Installation
6
6
 
@@ -1,3 +1,3 @@
1
1
  module XmlContactsExtractor
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -1,11 +1,13 @@
1
1
  require "xml_contacts_extractor/version"
2
2
  require "nokogiri"
3
3
 
4
- module XmlContactsExtractor
4
+ class XmlContactsExtractor
5
5
 
6
- def self.extract_contacts(xml)
6
+ attr_accessor :first_name, :fname, :last_name, :lname, :email, :phone, :address, :city, :state, :zip, :ip_address
7
+
8
+ def extract_contacts(xml)
7
9
 
8
- # get possible matches
10
+ # Different possible variations
9
11
  first_name_choices = ["first_name", "fname", "f_name", "firstname", "FirstName", "First", "name"]
10
12
  last_name_choices = ["lastname", "last_name", "lname", "l_name", "LastName", "Last"]
11
13
  email_choices = ["email", "email_address", "email_addy", "EmailAddress"]
@@ -18,85 +20,69 @@ module XmlContactsExtractor
18
20
 
19
21
  xml = xml.gsub(/\n|\\n|\\/, '')
20
22
  xml_doc = Nokogiri::XML(xml)
21
- p xml_doc
22
23
 
24
+ # First name
23
25
  first_name_choices.each do |f|
24
- #@first_name = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
25
26
  if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @first_name.blank?
26
- @first_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
27
+ @first_name = @fname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
27
28
  end
28
29
  end
29
30
 
31
+ # Last name
30
32
  last_name_choices.each do |f|
31
- #@last_name = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
32
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
33
- @last_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
33
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @last_name.blank?
34
+ @last_name = @lname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
34
35
  end
35
36
  end
36
37
 
38
+ # Email
37
39
  email_choices.each do |f|
38
- #@email = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
39
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
40
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @email.blank?
40
41
  @email = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
41
42
  end
42
43
  end
43
44
 
45
+ # Phone
44
46
  phone_choices.each do |f|
45
- #@phone = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
46
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
47
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @phone.blank?
47
48
  @phone = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
48
49
  end
49
50
  end
50
51
 
52
+ # Address
51
53
  address_choices.each do |f|
52
- #@address = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
53
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
54
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @address.blank?
54
55
  @address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
55
56
  end
56
57
  end
57
58
 
59
+ # City
58
60
  city_choices.each do |f|
59
- #@city = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
60
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
61
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @city.blank?
61
62
  @city = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
62
63
  end
63
64
  end
64
65
 
66
+ # State
65
67
  state_choices.each do |f|
66
- #@state = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
67
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
68
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @state.blank?
68
69
  @state = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
69
70
  end
70
71
  end
71
72
 
73
+ # Zip
72
74
  zip_choices.each do |f|
73
- #@zip = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
74
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
75
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @zip.blank?
75
76
  @zip = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
76
77
  end
77
78
  end
78
79
 
80
+ # IP Address
79
81
  ip_address_choices.each do |f|
80
- #@ip_address = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
81
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
82
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @ip_address.blank?
82
83
  @ip_address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
83
84
  end
84
85
  end
85
-
86
- matched_fields = {} # attempt to find and grab required fields to be stored in physical columns
87
- matched_fields = {
88
- :first_name => @first_name,
89
- :last_name => @last_name,
90
- :address => @address,
91
- :phone => @phone,
92
- :email => @email,
93
- :ip_address => @ip_address,
94
- :city => @city,
95
- :state => @state,
96
- :zip => @zip
97
- }
98
- matched_fields || {}
99
- p matched_fields
100
86
  end
101
87
 
102
- end
88
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_contacts_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: