utf8_sanitizer 0.0.2.pre.rc.01 → 0.0.2.pre.rc.02

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,100 @@
1
+ url,act_name,street,city,state,zip,phone
2
+ stanleykaufman.net,Stanley Chevrolet Kaufman,825 E Fair St,Kaufman,TX,75142,(888) 457-4391
3
+ leepartyka.com,Lee Partyka Chevrolet Mazda Isuzu Truck,200 Skiff St,Hamden,CT,6518,(203) 288-7761
4
+ burienhonda.com,"Honda of Burien
5
+ cortlandchryslerdodgejeep.com,Cortland Chrysler Dodge Jeep RAM,3878 West Rd,Cortland,NY,13045,(877) 279-3113
6
+ imperialmotors.net,Imperial Motors,4839 Virginia Beach Blvd,Virginia Beach,VA,23462,(757) 490-3651
7
+ liatoyotaofnorthampton.com,"Lia Toyota of Northampton
8
+ nelsonhallchevrolet.com,Nelson Hall Chevrolet,1811 S Frontage Rd,Meridian,MS,39301,(601) 621-4593
9
+ marshallfordco.com,Marshall Ford Co Inc.,14843 MS-16,Philadelphia,MS,39350,(888) 461-7643
10
+ warrentontoyota.com,Warrenton Toyota,6449 Lee Hwy,Warrenton,VA,20187,(540) 878-4100
11
+ toyotacertifiedatcentralcity.com,"Toyota Certified
12
  Central City",4800 Chestnut St,Philadelphia,PA,19139,(888) 379-1155
13
+ rockcitychrysler.com,Rock City Chrysler Jeep Dodge,520 Rock City St,Little Valley,NY,14755,(866) 414-1024
14
+ eddinsford.com,Eddins Ford Inc.,2895 S Seminole Trail,Madison,VA,22727,(888) 348-0661
15
+ lithiasubarufresno.com,Lithia Subaru of Fresno,5499 N Blackstone Ave,Fresno,CA,93710,(888) 453-5359
16
+ nnchevrolet.com,Northern Neck Chevrolet,18175 Kings Hwy,Montross,VA,22520,(804) 368-6547
17
+ brewsterhonda.com,"Lia Honda Brewster NY
18
+ tegelerchevrolet.com,Tegeler Chevrolet,17114 Fordtran Blvd,Industry,TX,78944,(979) 357-2575
19
+ rochesterhillschryslerjeep.net,Rochester Hills Chrysler Jeep Dodge,1301 S Rochester Rd,Rochester,MI,48307,(248) 841-4094
20
+ 1stkia.com,"First Kia
21
+ audijax.com,Audi Jacksonville,7230 Blanding Blvd,Jacksonville,FL,32244,(888) 451-7572
22
+ kalsauto.com,Kal's Auto Sales II,5130 W 8 Mile Rd,Detroit,MI,48234,(313) 891-0000
23
+ carwashcarsinc.com,Car Wash Cars Inc,462 Route 9W,Glenmont,NY,12077,(518) 729-4317
24
+ autonationnissanorangepark.com,AutoNation Nissan Orange Park,7447 Blanding Blvd,Jacksonville,FL,32244,(904) 270-9954
25
+ cfschwartztoyota.com,C. F. Schwartz Toyota,1536 N Dupont Hwy,Dover,DE,19901,(877) 769-6001
26
+ lhmlexus.com,"Directions to Larry H. Miller Lexus Murray near Salt Lake City, UT
27
+ agchevy.com,Arroyo Grande Chevrolet,303 Traffic Way,Arroyo Grande,CA,93420,(805) 202-4636
28
+ billowenscars.com,Bill Owens Auto Sales,705 U.S. 27 SOUTH,Avon Park,FL,33825,(863) 452-5599
29
+ damascusmotors.com,Damascus Motors,26100 Woodfield Rd,Damascus,MD,20872,(301) 253-2151
30
+ dennisdillonchryslerjeepdodge.com,Dennis Dillon Chrysler Jeep Dodge,4025 Cleveland Blvd,Caldwell,ID,83605,(888) 506-8191
31
+ bobhowardhonda.com,Bob Howard Honda,13201 N Kelley Ave,Oklahoma City,OK,73131,(405) 753-8700
32
+ humboldtdodgechryslerjeep.com,Lonnie Cobb's Humboldt Chrysler Dodge Jeep RAM,3301 Eastend Dr,Humboldt,TN,38343,(888) 554-6047
33
+ livingstonvw.com,Livingston Volkswagen,21141 Ventura Blvd,Woodland Hills,CA,91364,(855) 419-9290
34
+ beckmastennorth.com,Beck and Masten North,11300 FM 1960,Houston,TX,77065,(281) 469-5222
35
+ fairbanksnissan.com,Fairbanks Nissan,2610 S Cushman St,Fairbanks,AK,99701,(866) 467-8690
36
+ feyerlincoln.net,Feyer Lincoln Inc.,1677 US-17 BUS,Williamston,NC,27892,(866) 493-1351
37
+ forrestersdodgecity.com,Forrester's Dodge City,204 S Walnut St,Arnold,NE,69120,(888) 463-0311
38
+ victorchevrolet.com,Victor Chevrolet - A Rochester Area Dealer,7200 Pittsford Victor Rd,Victor,NY,14564,(585) 433-2500
39
+ landroverpalmbeach.com,Land Rover Palm Beach,7550 Okeechobee Blvd,West Palm Beach,FL,33411,(561) 209-7000
40
+ goudyhonda.com,Goudy Honda,1400 W Main St,Alhambra,CA,91801,(626) 576-1114
41
+ davis-olaughlin.com,Davis O'Laughlin Buick GMC Cadillac,4580 US-219,Great Valley,NY,14741,(716) 945-5050
42
+ lutherhondaofstcloud.com,Luther St. Cloud Honda,1500 Hwy 23 West,Waite Park,MN,56387,(888) 453-5650
43
+ titanautosales.com,Titan Auto Sales LLC,1037 Central Ave,Albany,NY,12205,(518) 438-0101
44
+ hixsonbmw.com,Hixson BMW Monroe,1201 Louisville Ave,Monroe,LA,71201,(855) 519-8884
45
+ antwerpenautoworld.com,Antwerpen Auto World,9400 Liberty Road,Randallstown,MD,21133,(888) 437-9880
46
+ lakelandcdj.com,Lakeland Chrysler Dodge Jeep,2875 Mall Hill Drive,Lakeland,FL,33810,(888) 420-1648
47
+ blakegreenfieldchevroletbuick.com,Blake Greenfield Chevrolet Buick,150 3rd St NE,Wells,MN,56097,(507) 774-0365
48
+ carltonmb.com,Carlton Motorcars,2446 Laurens Rd,Greenville,SC,29607,(855) 336-9760
49
+ silkohonda.com,Silko Honda,1580 New State Hwy,Raynham,MA,2767,(508) 880-5500
50
+ lahabramotors.com,La Habra Motors,980 E Whittier Blvd,La Habra,CA,90631,(562) 697-2400
51
+ hugoautosales.com,Hugo Auto Sales,2583 Cameron Langston Rd,Grifton,NC,28530,(252) 522-4140
52
+ baxterfordsouth.com,Baxter Ford South,9203 S 145th St,Omaha,NE,68138,(855) 985-0071
53
+ roemotorsny.com,Roe Motors Ltd,662 Montauk Hwy,Shirley,NY,11967,(631) 395-2277
54
+ kingmontclair.com,King Auto Sales,5053 Mission Blvd,Montclair,CA,91763,(909) 627-1211
55
+ douglasinfiniti.com,Douglas Infiniti,430 Morris Ave,Summit,NJ,7901,(908) 522-7300
56
+ plazainfiniti.com,Plaza Infiniti,755 N New Ballas Rd,Creve Coeur,MO,63141,(888) 593-1881
57
+ coughlinmarysvillechrysler.com,Coughlin Chrysler Jeep Dodge RAM,15777 Watkins Rd,Marysville,OH,43040,(888) 378-6128
58
+ dillonlakemtrs.com,Dillon Lake Motors,3350 Maple Ave,Zanesville,OH,43701,(740) 297-7265
59
+ chapmanspeedway.com,Chapman Used Cars on Speedway,6001 E Speedway Blvd,Tucson,AZ,85712,(520) 829-0032
60
+ beckchevroletonline.com,"Beck Chevrolet Co., Inc.",561 Central Park Ave,Yonkers,NY,10704,(914) 595-1463
61
+ infinitimv.com,INFINITI of Mission Viejo,28471 Marguerite Pkwy,Mission Viejo,CA,92692,(855) 410-3445
62
+ hondaofserramonte.com,Honda of Serramonte,485 Serramonte Blvd,Colma,CA,94014,(888) 892-5461
63
+ douglassnissanofwaco.com,Douglass Nissan of Waco,5605 Legend Lake Pkwy,Waco,TX,76712,(877) 302-5270
64
+ saturnofknoxville.com,McManus Auto Sales,6404 Maynardville Pike,Knoxville,TN,37918,(865) 281-2278
65
+ avchevy.com,Antelope Valley Chevrolet,1160 Motor Ln,Lancaster,CA,93534,(661) 952-2300
66
+ skylineford.net,Skyline Ford,2510 Commercial St SE,Salem,OR,97302,(888) 873-7584
67
+ moseshuntingtongm.com,Moses Buick GMC Cadillac,5200 Route 60 East,Huntington,WV,25705,(304) 736-5291
68
+ hatchtoyota.com,2017 Hatch Toyota.,1051 Automall Pkwy,Show Low,AZ,85901,(928) 242-9007
69
+ malouffordinc.com,Malouf Ford,2210 US-1,North Brunswick Township,NJ,8902,(866) 951-8097
70
+ seminolefordok.com,Seminole Ford,2222 N Milt Phillips Ave,Seminole,OK,74868,(888) 903-5643
71
+ flagshipmotorcars.com,Flagship Motorcars of Lynnfield,385 Broadway,Lynnfield,MA,1940,(877) 907-1967
72
+ capitolhyundai.com,Capitol Hyundai Columbia,101 Newland Rd,Columbia,SC,29229,(800) 258-1436
73
+ searsimports.com,"Sears Imported Autos, Inc.",13500 Wayzata Blvd,Minnetonka,MN,55305,(888) 580-8791
74
+ videon.com,Videon Chrysler Dodge Jeep RAM,4951 West Chester Pike,Newtown Square,PA,19073,(610) 356-7000
75
+ towncountrychryslerdodge.com,Town & Country Chrysler Dodge Ram,703 Crim Ave,Belington,WV,26250,(877) 830-9359
76
+ palmharborhonda.com,Courtesy Palm Harbor Honda,31200 US Hwy 19 N,Palm Harbor,FL,34684,(866) 423-6155
77
+ bravolascruces.com,Bravo Chevrolet Cadillac,1601 S Main St,Las Cruces,NM,88005,(575) 527-3800
78
+ ricksautosite.com,Rick's Auto Sales Used Cars,1536 E 5th St,Metropolis,IL,62960,(618) 524-7366
79
+ audiofhuntington.com,Audi of Huntington,363 E Jericho Turnpike,Huntington Station,NY,11746,(866) 455-0268
80
+ discountautocorner.com,Discount Auto,2908 Lincoln Hwy,Langhorne,PA,19047,(215) 757-5170
81
+ audipetoskey.com,Audi Petoskey,825 Charlevoix Ave,Petoskey,MI,49770,(888) 478-1060
82
+ vwoforchardpark.com,Volkswagen of Orchard Park,3524 Southwestern Blvd,Orchard Park,NY,14127,(716) 216-0256
83
+ pietroske.com,Pietroske Chevrolet,4000 Grand Avenue,Manitowoc,WI,54221,(920) 684-0224
84
+ hendrickhyundainorth.com,Hendrick Hyundai North Charleston,8485 Rivers Ave,North Charleston,SC,29406,(866) 542-1320
85
+ johnsgreatcars.com,John's Great Cars,1133 Lancaster Ave,Reading,PA,19607,(610) 603-9100
86
+ beckmotors.net,Beck Motors,305 US Hwy 63,Freeburg,MO,65035,(888) 638-4505
87
+ parksmotorsales.com,Parks Motor Sales Inc.,919 Nashville Hwy,Columbia,TN,38401,(931) 388-5843
88
+ josephbuickgmc.com,Joseph Buick GMC,8700 Colerain Ave,Groesbeck,OH,45251,(513) 741-1000
89
+ centralgmcnorwood.com,Central Buick GMC of Norwood,70 Boston Providence Hwy,Norwood,MA,2062,(781) 473-0908
90
+ cruisinclassicsinc.com,Cruisin Classics Auto Sales,3575 Fisher Rd,Columbus,OH,43228,(614) 276-7355
91
+ everettchryslerjeepdodgeram.com,Everett Chrysler Dodge Jeep Ram,3709 S Thompson St,Springdale,AR,72764,(888) 257-0465
92
+ garavelsubaru.com,Garavel Subaru,10 Tindall Ave,Norwalk,CT,6851,(877) 223-0913
93
+ jansenchevrolet.com,Jansen Chevrolet,7801 IL-161,Germantown,IL,62245,(618) 613-4215
94
+ bramanbmwjupiter.com,Braman BMW of Jupiter,1555 W Indiantown Rd,Jupiter,FL,33458,(561) 609-0144
95
+ mildenbergermotors.com,Mildenberger Motors,1717 N 1st St,Hamilton,MT,59840,(406) 626-3050
96
+ autonationfordarlington.com,AutoNation Ford Arlington,1400 W Interstate 20,Arlington,TX,76017,(817) 200-4503
97
+ rampmotors.com,Ramp Motors,4869 Nesconset Hwy,Port Jefferson Station,NY,11776,(631) 473-1550
98
+ millerchevy.com,Miller & Sons Chevrolet Buick,3107 Green Garden Rd,Aliquippa,PA,15001,(724) 378-0541
99
+ cjsautosales.com,CJS Auto Sales,2509 S Houston Rd,Pasadena,TX,77502,(713) 947-6405
100
+ redwingchev.net,Red Wing Chevrolet Buick Cadillac,2500 US Hwy 61,Red Wing,MN,55066,(651) 388-4777
101
+ sunnyvalevw.com,Sunnyvale Volkswagen,1025 E El Camino Real,Sunnyvale,CA,94087,(877) 785-8252
@@ -0,0 +1,11 @@
1
+ url,act_name,street,city,state,zip,phone
2
+ stanleykaufman.net,Stanley Chevrolet Kaufman,825 E Fair St,Kaufman,TX,75142,(888) 457-4391
3
+ leepartyka.com,Lee Partyka Chevrolet Mazda Isuzu Truck,200 Skiff St,Hamden,CT,6518,(203) 288-7761
4
+ burienhonda.com,"Honda of Burien
5
+ cortlandchryslerdodgejeep.com,Cortland Chrysler Dodge Jeep RAM,3878 West Rd,Cortland,NY,13045,(877) 279-3113
6
+ imperialmotors.net,Imperial Motors,4839 Virginia Beach Blvd,Virginia Beach,VA,23462,(757) 490-3651
7
+ liatoyotaofnorthampton.com,"Lia Toyota of Northampton
8
+ nelsonhallchevrolet.com,Nelson Hall Chevrolet,1811 S Frontage Rd,Meridian,MS,39301,(601) 621-4593
9
+ marshallfordco.com,Marshall Ford Co Inc.,14843 MS-16,Philadelphia,MS,39350,(888) 461-7643
10
+ warrentontoyota.com,Warrenton Toyota,6449 Lee Hwy,Warrenton,VA,20187,(540) 878-4100
11
+ toyotacertifiedatcentralcity.com,"Toyota Certified
12
  Central City",4800 Chestnut St,Philadelphia,PA,19139,(888) 379-1155
@@ -0,0 +1,3 @@
1
+ url,act_name,street,city,state,zip,phone
2
+ stanleykaufman.net.com.gov,Stanley Chevrolet Kaufman,825 E Fair St,Kaufman,TX,75142,(888) 457-4391
3
+ leepartyka,Lee Partyka Chevrolet Mazda Isuzu Truck,200 Skiff St,Hamden,CT,6518,(203) 288-7761
@@ -0,0 +1,31 @@
1
+
2
+ module Utf8Sanitizer
3
+ class Run
4
+
5
+ def initialize
6
+ @crm_data = {}
7
+ end
8
+
9
+
10
+ def import(args={})
11
+ @crm_data = { stats: nil, data: nil, file_path: nil, criteria: nil }
12
+ @crm_data.merge!(args)
13
+ keys = args.compact.keys
14
+
15
+ unless (keys & [:data, :file_path]).any?
16
+ @crm_data[:file_path] = Seed.new.grab_seed_file_path
17
+ # @crm_data[:data] = Seed.new.grab_seed_hashes
18
+ @crm_data[:pollute_seeds] = true
19
+ unless keys.include?(:criteria)
20
+ @crm_data[:criteria] = Seed.new.grab_seed_web_criteria
21
+ end
22
+ end
23
+
24
+ utf_result = Utf8Sanitizer::UTF.new.validate_data(@crm_data)
25
+ @crm_data.merge!(utf_result)
26
+ end
27
+
28
+
29
+
30
+ end
31
+ end
@@ -0,0 +1,97 @@
1
+ require 'csv'
2
+
3
+ module Utf8Sanitizer
4
+ class Seed
5
+ def initialize(args={})
6
+ # @pollute_seeds = args.fetch(:pollute_seeds, false)
7
+ # @seed_hashes = args.fetch(:seed_hashes, false)
8
+ # @seed_csv = args.fetch(:seed_csv, false)
9
+ end
10
+
11
+ def pollute_seeds(text)
12
+ list = ['h∑', 'lÔ', "\x92", "\x98", "\x99", "\xC0", "\xC1", "\xC2", "\xCC", "\xDD", "\xE5", "\xF8"]
13
+ index = text.length / 2
14
+ var = "#{list.sample}_#{list.sample}"
15
+ text.insert(index, var)
16
+ text.insert(-1, "\r\n")
17
+ text
18
+ end
19
+
20
+ def grab_seed_file_path
21
+ # "./lib/utf8_sanitizer/csv/seeds_clean.csv"
22
+ # "./lib/utf8_sanitizer/csv/seeds_dirty.csv"
23
+ # "./lib/utf8_sanitizer/csv/seeds_mega.csv"
24
+ # "./lib/utf8_sanitizer/csv/seeds_mini.csv"
25
+ # "./lib/utf8_sanitizer/csv/seeds_mini_10.csv"
26
+ './lib/utf8_sanitizer/csv/seeds_mini_2_bug.csv'
27
+ end
28
+
29
+ ### Sample Hashes for validate_data
30
+ def grab_seed_hashes
31
+ [{ row_id: 1,
32
+ url: 'stanleykaufman.com',
33
+ act_name: 'Stanley Chevrolet Kaufman',
34
+ street: '825 E Fair St',
35
+ city: 'Kaufman',
36
+ state: 'TX',
37
+ zip: '75142',
38
+ phone: '(888) 457-4391' },
39
+ { row_id: 2,
40
+ url: 'leepartyka',
41
+ act_name: 'Lee Partyka Chevrolet Mazda Isuzu Truck',
42
+ street: '200 Skiff St',
43
+ city: 'Hamden',
44
+ state: 'CT',
45
+ zip: '6518',
46
+ phone: '(203) 288-7761' },
47
+ { row_id: 3,
48
+ url: 'burienhonda.fake.not.net.com',
49
+ act_name: 'Honda of Burien 15026 1st Avenue South, Burien, WA 98148',
50
+ street: '15026 1st Avenue South',
51
+ city: 'Burien',
52
+ state: 'WA',
53
+ zip: '98148',
54
+ phone: '(206) 246-9700' },
55
+ { row_id: 4,
56
+ url: 'cortlandchryslerdodgejeep.com',
57
+ act_name: 'Cortland Chrysler Dodge Jeep RAM',
58
+ street: '3878 West Rd',
59
+ city: 'Cortland',
60
+ state: 'NY',
61
+ zip: '13045',
62
+ phone: '(877) 279-3113' },
63
+ { row_id: 5,
64
+ url: 'imperialmotors.net',
65
+ act_name: 'Imperial Motors',
66
+ street: '4839 Virginia Beach Blvd',
67
+ city: 'Virginia Beach',
68
+ state: 'VA',
69
+ zip: '23462',
70
+ phone: '(757) 490-3651' }]
71
+ end
72
+
73
+
74
+
75
+ def grab_seed_web_criteria
76
+ pos_urls, neg_urls, neg_links, neg_hrefs, neg_exts = [], [], [], [], []
77
+
78
+ neg_urls = %w[approv avis budget collis eat enterprise facebook financ food google gourmet hertz hotel hyatt insur invest loan lube mobility motel motorola parts quick rent repair restaur rv ryder service softwar travel twitter webhost yellowpages yelp youtube]
79
+
80
+ pos_urls = ['acura', 'alfa romeo', 'aston martin', 'audi', 'bmw', 'bentley', 'bugatti', 'buick', 'cdjr', 'cadillac', 'chevrolet', 'chrysler', 'dodge', 'ferrari', 'fiat', 'ford', 'gmc', 'group', 'group', 'honda', 'hummer', 'hyundai', 'infiniti', 'isuzu', 'jaguar', 'jeep', 'kia', 'lamborghini', 'lexus', 'lincoln', 'lotus', 'mini', 'maserati', 'mazda', 'mclaren', 'mercedes-benz', 'mitsubishi', 'nissan', 'porsche', 'ram', 'rolls-royce', 'saab', 'scion', 'smart', 'subaru', 'suzuki', 'toyota', 'volkswagen', 'volvo']
81
+
82
+ # neg_links = %w(: .biz .co .edu .gov .jpg .net // afri anounc book business buy bye call cash cheap click collis cont distrib download drop event face feature feed financ find fleet form gas generat graphic hello home hospi hour hours http info insta inventory item join login mail mailto mobile movie museu music news none offer part phone policy priva pump rate regist review schedul school service shop site test ticket tire tv twitter watch www yelp youth)
83
+
84
+ # neg_hrefs = %w(? .com .jpg @ * afri after anounc apply approved blog book business buy call care career cash charit cheap check click collis commerc cont contrib deal distrib download employ event face feature feed financ find fleet form gas generat golf here holiday hospi hour info insta inventory join later light login mail mobile movie museu music news none now oil part pay phone policy priva pump quick quote rate regist review saving schedul service shop sign site speci ticket tire today transla travel truck tv twitter watch youth)
85
+
86
+ neg_exts = %w[au ca edu es gov in ru uk us]
87
+
88
+ oa_args = { pos_urls: pos_urls, neg_urls: neg_urls, neg_links: neg_links, neg_hrefs: neg_hrefs, neg_exts: neg_exts }
89
+ oa_args.compact
90
+ end
91
+
92
+
93
+
94
+
95
+
96
+ end
97
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: false
2
+ # require 'csv'
3
+
4
+ module Utf8Sanitizer
5
+ class UTF
6
+ def initialize(args={})
7
+ @utf_result = { stats: {}, data: {} }
8
+ @valid_rows = []
9
+ @encoded_rows = []
10
+ @defective_rows = []
11
+ @error_rows = []
12
+ @headers = []
13
+ @row_id = 0
14
+ @data_hash = {}
15
+ end
16
+
17
+ #################### * VALIDATE DATA * ####################
18
+ def validate_data(args={})
19
+ args = args.slice(:file_path, :data, :pollute_seeds)
20
+ args = args.compact
21
+
22
+ @seed = Seed.new if args.fetch(:pollute_seeds)
23
+ file_path = args[:file_path]
24
+ data = args[:data]
25
+
26
+ utf_result = validate_csv(file_path) if file_path
27
+ utf_result = validate_hashes(data) if data
28
+ utf_result
29
+ end
30
+
31
+ #################### * COMPILE RESULTS * ####################
32
+ def compile_results
33
+ utf_status = @valid_rows.map { |hsh| hsh[:utf_status] }
34
+ mapped_details = utf_status.map { |str| str.split(', ') }.flatten.compact
35
+ groups = make_groups_from_array(mapped_details)
36
+ wchar = groups['wchar']
37
+ perfect = groups['perfect']
38
+
39
+ header_row_count = @headers.any? ? 1 : 0
40
+ stats = { total_rows: @row_id, header_row: header_row_count, valid_rows: @valid_rows.count, error_rows: @error_rows.count, defective_rows: @defective_rows.count, perfect_rows: perfect, encoded_rows: @encoded_rows.count, wchar_rows: wchar }
41
+ data = { valid_data: @valid_rows, encoded_data: @encoded_rows, defective_data: @defective_rows, error_data: @error_rows }
42
+ @utf_result = { stats: stats, data: data }
43
+ utf_result = @utf_result
44
+ initialize
45
+ utf_result
46
+ end
47
+
48
+ #################### * VALIDATE CSV * ####################
49
+ def validate_csv(file_path)
50
+ return unless file_path.present?
51
+ File.open(file_path).each do |file_line|
52
+ validated_line = utf_filter(check_utf(file_line))
53
+ @row_id += 1
54
+ if validated_line
55
+ CSV.parse(validated_line) do |row|
56
+ if @headers.empty?
57
+ @headers = row
58
+ else
59
+ @data_hash.merge!(row_to_hsh(row))
60
+ @valid_rows << @data_hash
61
+ end
62
+ end
63
+ end
64
+ rescue StandardError => error
65
+ @error_rows << { row_id: @row_id, text: error.message }
66
+ end
67
+ compile_results
68
+ end
69
+
70
+ #################### * VALIDATE HASHES * ####################
71
+ def validate_hashes(orig_hashes)
72
+ return unless orig_hashes.present?
73
+ begin
74
+ process_hash_row(orig_hashes.first) ## re keys for headers.
75
+ orig_hashes.each { |hsh| process_hash_row(hsh) } ## re values
76
+ rescue StandardError => error
77
+ @error_rows << { row_id: @row_id, text: error.message }
78
+ end
79
+ compile_results ## handles returns.
80
+ end
81
+
82
+ ### process_hash_row - helper VALIDATE HASHES ###
83
+ ### Converts hash keys and vals into parsed line.
84
+ def process_hash_row(hsh)
85
+ if @headers.any?
86
+ keys_or_values = hsh.values
87
+ @row_id = hsh[:row_id]
88
+ else
89
+ keys_or_values = hsh.keys.map(&:to_s)
90
+ end
91
+
92
+ file_line = keys_or_values.join(',')
93
+ line_parse(utf_filter(check_utf(file_line)))
94
+ end
95
+
96
+ ### line_parse - helper VALIDATE HASHES ###
97
+ ### Parses line to row, then updates final results.
98
+ def line_parse(validated_line)
99
+ return unless validated_line
100
+ row = validated_line.split(',')
101
+ return unless row.any?
102
+ if @headers.empty?
103
+ @headers = row
104
+ else
105
+ @data_hash.merge!(row_to_hsh(row))
106
+ @valid_rows << @data_hash
107
+ end
108
+ end
109
+
110
+ #################### * CHECK UTF * ####################
111
+ def check_utf(text)
112
+ return unless text.present?
113
+ text = @seed.pollute_seeds(text) if @seed && @headers.any?
114
+ results = { text: text, encoded: nil, wchar: nil, error: nil }
115
+ begin
116
+ if !text.valid_encoding?
117
+ encoded = text.chars.select(&:valid_encoding?).join
118
+ encoded.delete!('_')
119
+ encoded = encoded.delete("^\u{0000}-\u{007F}")
120
+ else
121
+ encoded = text.delete("^\u{0000}-\u{007F}")
122
+ end
123
+ wchar = encoded&.gsub(/\s+/, ' ')&.strip
124
+ results[:encoded] = encoded if text != encoded
125
+ results[:wchar] = wchar if encoded != wchar
126
+ rescue StandardError => error
127
+ results[:error] = error.message if error
128
+ end
129
+ results
130
+ end
131
+
132
+ #################### * UTF FILTER * ####################
133
+ def utf_filter(utf)
134
+ return unless utf.present?
135
+ puts utf.inspect
136
+ utf_status = utf.except(:text).compact.keys
137
+ utf_status = utf_status&.map(&:to_s)&.join(', ')
138
+ utf_status = 'perfect' if utf_status.blank?
139
+
140
+ encoded = utf[:text] if utf[:encoded]
141
+ error = utf[:error]
142
+ line = utf.except(:error).compact.values.last unless error
143
+ data_hash = { row_id: @row_id, utf_status: utf_status }
144
+
145
+ @encoded_rows << { row_id: @row_id, text: encoded } if encoded
146
+ @error_rows << { row_id: @row_id, text: error } if error
147
+ @defective_rows << filt_utf_hsh[:text] if error
148
+ @data_hash = data_hash if @data_hash[:row_id] != @row_id
149
+ line
150
+ end
151
+
152
+ ############# !! HELPERS BELOW !! #############
153
+ ############# KEY VALUE CONVERTERS #############
154
+ def row_to_hsh(row)
155
+ h = Hash[@headers.zip(row)]
156
+ h.symbolize_keys
157
+ end
158
+
159
+ def val_hsh(cols, hsh)
160
+ keys = hsh.keys
161
+ keys.each { |key| hsh.delete(key) unless cols.include?(key) }
162
+ hsh
163
+ end
164
+
165
+ def make_groups_from_array(array)
166
+ array.each_with_object(Hash.new(0)) { |e, h| h[e] += 1; }
167
+ end
168
+
169
+ end
170
+ end
@@ -1,4 +1,4 @@
1
1
  module Utf8Sanitizer
2
2
  # VERSION = "0.0.1-rc.1"
3
- VERSION = "0.0.2.pre.rc.01"
3
+ VERSION = "0.0.2.pre.rc.02"
4
4
  end
@@ -1,11 +1,14 @@
1
1
  require "utf8_sanitizer/version"
2
+ require 'utf8_sanitizer/run'
3
+ require 'utf8_sanitizer/seed'
4
+ require 'utf8_sanitizer/utf'
5
+ require 'pry'
2
6
 
3
7
  module Utf8Sanitizer
4
8
 
5
9
  def self.run_wrap
6
- puts "Sample the Wrap!"
7
- # wrap = self::Wrap.new
8
- # wrap.run_wrap ## returns formatted urls.
10
+ run = self::Run.new
11
+ result = run.import ## returns formatted urls.
9
12
  end
10
13
 
11
14
  end
@@ -14,8 +14,8 @@ Gem::Specification.new do |spec|
14
14
  spec.homepage = 'https://github.com/4rlm/utf8_sanitizer'
15
15
  spec.license = 'MIT'
16
16
 
17
- spec.summary = "Still in Development: Removes invalid UTF8 characters, and extra whitespace (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings."
18
- spec.description = "Removes invalid UTF8 characters, and extra whitespace (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings.\n Example: ABC Au\\xC1tos,123 E Main St,Anytown,TX,75142,(888) 555-1234\\n\\r\\n \n=> ABC Autos,123 E Main St,Anytown,TX,75142,(888) 555-1234"
17
+ spec.summary = "Still in BETA: Removes invalid UTF8 characters, and extra whitespace (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings."
18
+ spec.description = "Still in BETA: Removes invalid UTF8 characters, and extra whitespace (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings.\n Example: ABC Au\\xC1tos,123 E Main St,Anytown,TX,75142,(888) 555-1234\\n\\r\\n => ABC Autos,123 E Main St,Anytown,TX,75142,(888) 555-1234"
19
19
 
20
20
  if spec.respond_to?(:metadata)
21
21
  spec.metadata['allowed_push_host'] = "https://rubygems.org"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8_sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2.pre.rc.01
4
+ version: 0.0.2.pre.rc.02
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Booth
@@ -180,10 +180,9 @@ dependencies:
180
180
  - - "~>"
181
181
  - !ruby/object:Gem::Version
182
182
  version: 0.11.3
183
- description: "Removes invalid UTF8 characters, and extra whitespace (carriage returns,
184
- new lines, tabs, spaces, etc.) from csv, or strings.\n Example: ABC Au\\xC1tos,123
185
- E Main St,Anytown,TX,75142,(888) 555-1234\\n\\r\\n \n=> ABC Autos,123 E Main St,Anytown,TX,75142,(888)
186
- 555-1234"
183
+ description: |-
184
+ Still in BETA: Removes invalid UTF8 characters, and extra whitespace (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings.
185
+ Example: ABC Au\xC1tos,123 E Main St,Anytown,TX,75142,(888) 555-1234\n\r\n => ABC Autos,123 E Main St,Anytown,TX,75142,(888) 555-1234
187
186
  email:
188
187
  - 4rlm@protonmail.ch
189
188
  executables: []
@@ -201,6 +200,16 @@ files:
201
200
  - bin/console
202
201
  - bin/setup
203
202
  - lib/utf8_sanitizer.rb
203
+ - lib/utf8_sanitizer/csv/extensions.csv
204
+ - lib/utf8_sanitizer/csv/seeds_clean.csv
205
+ - lib/utf8_sanitizer/csv/seeds_dirty.csv
206
+ - lib/utf8_sanitizer/csv/seeds_mega.csv
207
+ - lib/utf8_sanitizer/csv/seeds_mini.csv
208
+ - lib/utf8_sanitizer/csv/seeds_mini_10.csv
209
+ - lib/utf8_sanitizer/csv/seeds_mini_2_bug.csv
210
+ - lib/utf8_sanitizer/run.rb
211
+ - lib/utf8_sanitizer/seed.rb
212
+ - lib/utf8_sanitizer/utf.rb
204
213
  - lib/utf8_sanitizer/version.rb
205
214
  - utf8_sanitizer.gemspec
206
215
  - utf8_sanitizer_gemspec_orig.txt
@@ -228,6 +237,6 @@ rubyforge_project:
228
237
  rubygems_version: 2.7.6
229
238
  signing_key:
230
239
  specification_version: 4
231
- summary: 'Still in Development: Removes invalid UTF8 characters, and extra whitespace
232
- (carriage returns, new lines, tabs, spaces, etc.) from csv, or strings.'
240
+ summary: 'Still in BETA: Removes invalid UTF8 characters, and extra whitespace (carriage
241
+ returns, new lines, tabs, spaces, etc.) from csv, or strings.'
233
242
  test_files: []