xml_contacts_extractor 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # XmlContactsExtractor
2
2
 
3
- TODO: Write a gem description
3
+ Extracts contact information from XML.
4
4
 
5
5
  ## Installation
6
6
 
@@ -1,3 +1,3 @@
1
1
  module XmlContactsExtractor
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -1,11 +1,13 @@
1
1
  require "xml_contacts_extractor/version"
2
2
  require "nokogiri"
3
3
 
4
- module XmlContactsExtractor
4
+ class XmlContactsExtractor
5
5
 
6
- def self.extract_contacts(xml)
6
+ attr_accessor :first_name, :fname, :last_name, :lname, :email, :phone, :address, :city, :state, :zip, :ip_address
7
+
8
+ def extract_contacts(xml)
7
9
 
8
- # get possible matches
10
+ # Different possible variations
9
11
  first_name_choices = ["first_name", "fname", "f_name", "firstname", "FirstName", "First", "name"]
10
12
  last_name_choices = ["lastname", "last_name", "lname", "l_name", "LastName", "Last"]
11
13
  email_choices = ["email", "email_address", "email_addy", "EmailAddress"]
@@ -18,85 +20,69 @@ module XmlContactsExtractor
18
20
 
19
21
  xml = xml.gsub(/\n|\\n|\\/, '')
20
22
  xml_doc = Nokogiri::XML(xml)
21
- p xml_doc
22
23
 
24
+ # First name
23
25
  first_name_choices.each do |f|
24
- #@first_name = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
25
26
  if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @first_name.blank?
26
- @first_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
27
+ @first_name = @fname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
27
28
  end
28
29
  end
29
30
 
31
+ # Last name
30
32
  last_name_choices.each do |f|
31
- #@last_name = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
32
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
33
- @last_name = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
33
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @last_name.blank?
34
+ @last_name = @lname = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
34
35
  end
35
36
  end
36
37
 
38
+ # Email
37
39
  email_choices.each do |f|
38
- #@email = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
39
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
40
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @email.blank?
40
41
  @email = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
41
42
  end
42
43
  end
43
44
 
45
+ # Phone
44
46
  phone_choices.each do |f|
45
- #@phone = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
46
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
47
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @phone.blank?
47
48
  @phone = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
48
49
  end
49
50
  end
50
51
 
52
+ # Address
51
53
  address_choices.each do |f|
52
- #@address = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
53
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
54
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @address.blank?
54
55
  @address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
55
56
  end
56
57
  end
57
58
 
59
+ # City
58
60
  city_choices.each do |f|
59
- #@city = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
60
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
61
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @city.blank?
61
62
  @city = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
62
63
  end
63
64
  end
64
65
 
66
+ # State
65
67
  state_choices.each do |f|
66
- #@state = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
67
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
68
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @state.blank?
68
69
  @state = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
69
70
  end
70
71
  end
71
72
 
73
+ # Zip
72
74
  zip_choices.each do |f|
73
- #@zip = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
74
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
75
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @zip.blank?
75
76
  @zip = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
76
77
  end
77
78
  end
78
79
 
80
+ # IP Address
79
81
  ip_address_choices.each do |f|
80
- #@ip_address = xml_doc.at_xpath("//#{f}").content.to_s if xml_doc.at_xpath("//#{f}")
81
- if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]")
82
+ if xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]") && @ip_address.blank?
82
83
  @ip_address = xml_doc.at_xpath("//*[contains(translate(name(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'), '#{f}')]").content.to_s
83
84
  end
84
85
  end
85
-
86
- matched_fields = {} # attempt to find and grab required fields to be stored in physical columns
87
- matched_fields = {
88
- :first_name => @first_name,
89
- :last_name => @last_name,
90
- :address => @address,
91
- :phone => @phone,
92
- :email => @email,
93
- :ip_address => @ip_address,
94
- :city => @city,
95
- :state => @state,
96
- :zip => @zip
97
- }
98
- matched_fields || {}
99
- p matched_fields
100
86
  end
101
87
 
102
- end
88
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_contacts_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: