piplcollector 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/piplcollector.rb +116 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3586ce00624a23fabe60ec6135415221723b8ec0
4
+ data.tar.gz: 41dfc1accf74bc77c0c8a09ad0374f0b622d8592
5
+ SHA512:
6
+ metadata.gz: 43229a308129b7211ffeb4401fff72ed2a6d46479c300620431de696a675f9679d00975ad4eb506d765593c018fa4b859f82249174baac0ea884e530713363c1
7
+ data.tar.gz: fcc01c2ad1ec8c8029b9d8cc605de476f481b2784e5adffb47839e3a64ddbaee915a1ff4a477303498d242209f2a5ab85983f3ee8af3f64b0ea1826580b7ee27
@@ -0,0 +1,116 @@
1
+ require 'piplrequest'
2
+ require 'json'
3
+ require 'pry'
4
+
5
+ class PiplCollector
6
+ def initialize(input_dir, output_dir, output_append_dir, id_field, ignore_files, api_key, field_mapping)
7
+ @input_dir = input_dir
8
+ @output_dir = output_dir
9
+ @output_append_dir = output_append_dir
10
+ @id_field = id_field
11
+ @ignore_files = ignore_files
12
+ @api_key = api_key
13
+ @field_mapping = field_mapping
14
+ @already_collected = load_output_files
15
+ end
16
+
17
+ # Load the output files into already_collected
18
+ def load_output_files
19
+ collected = []
20
+
21
+ # Make a list of all saved files
22
+ Dir.foreach(@output_dir) do |file|
23
+ next if file == '.' or file == '..'
24
+ collected.push(file.gsub(".json", ""))
25
+ end
26
+
27
+ return collected
28
+ end
29
+
30
+ # Save output file
31
+ def save_output_file(output_item, data_item)
32
+ id = gen_filename_from_id(data_item)
33
+ File.write(@output_dir+"/"+id+".json", output_item)
34
+ @already_collected.push(id)
35
+ end
36
+
37
+ # Generates a file-safe name from the id field
38
+ def gen_filename_from_id(data_item)
39
+ data_item[@id_field].gsub(":", "").gsub("/", "").gsub(".", "")
40
+ end
41
+
42
+ # Checks if it is already collected
43
+ def was_collected?(data_item)
44
+ @already_collected.include?(gen_filename_from_id(data_item))
45
+ end
46
+
47
+ # Get info on person from pipl
48
+ def get_person(data_item)
49
+ sleep(1)
50
+
51
+ # Get data from Pipl
52
+ p = PiplRequest.new(@api_key, @field_mapping)
53
+ output = p.get_data(data_item)
54
+
55
+ # Handle output
56
+ save_output_file(output, data_item) if output
57
+ return JSON.parse(output) if output
58
+ end
59
+
60
+ # Gets content for already collected person
61
+ def get_already_collected_person(data_item)
62
+ filename = @output_dir+"/"+gen_filename_from_id(data_item)+".json"
63
+ return file = JSON.parse(File.read(filename))
64
+ end
65
+
66
+ # Process file
67
+ def process(file)
68
+ data = JSON.parse(File.read(file))
69
+ outfile = Array.new
70
+
71
+ # Go through each item in file
72
+ data.each do |item|
73
+ if !was_collected?(item)
74
+ item[:pipl] = get_person(item)
75
+ else
76
+ item[:pipl] = get_already_collected_person(item)
77
+ end
78
+ outfile.push(item)
79
+ end
80
+
81
+ JSON.pretty_generate(outfile)
82
+ end
83
+
84
+ # Create if they don't exist
85
+ def create_write_dirs(dir)
86
+ dirs = dir.split("/")
87
+ dirs.delete("")
88
+ overallpath = ""
89
+ dirs.each do |d|
90
+ Dir.mkdir(overallpath+"/"+d) if !File.directory?(overallpath+"/"+d)
91
+ overallpath += ("/"+d)
92
+ end
93
+ end
94
+
95
+ # Figure out where to write it
96
+ def get_write_dir(dir, file)
97
+ dir_save = dir.gsub(@input_dir, @output_append_dir)
98
+ return dir_save+"/"+file
99
+ end
100
+
101
+ # Run on files
102
+ def run(dir)
103
+ Dir.foreach(dir) do |file|
104
+ next if file == '.' or file == '..'
105
+ if File.directory?(dir+"/"+file)
106
+ run(dir+"/"+file)
107
+ elsif file.include?(".json") && !file.include?(@ignore_files)
108
+ if !File.exist?(get_write_dir(dir, file))
109
+ with_pipl = process(dir+"/"+file)
110
+ create_write_dirs(dir.gsub(@input_dir, @output_append_dir))
111
+ File.write(get_write_dir(dir, file), with_pipl)
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: piplcollector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gets data from Pipl for dir of files
14
+ email: shidash@shidash.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/piplcollector.rb
20
+ homepage: https://github.com/TransparencyToolkit/piplcollector
21
+ licenses:
22
+ - GPL
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.8
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Gets data from Pipl for dir of files
44
+ test_files: []
45
+ has_rdoc: