piplcollector 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/piplcollector.rb +116 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3586ce00624a23fabe60ec6135415221723b8ec0
4
+ data.tar.gz: 41dfc1accf74bc77c0c8a09ad0374f0b622d8592
5
+ SHA512:
6
+ metadata.gz: 43229a308129b7211ffeb4401fff72ed2a6d46479c300620431de696a675f9679d00975ad4eb506d765593c018fa4b859f82249174baac0ea884e530713363c1
7
+ data.tar.gz: fcc01c2ad1ec8c8029b9d8cc605de476f481b2784e5adffb47839e3a64ddbaee915a1ff4a477303498d242209f2a5ab85983f3ee8af3f64b0ea1826580b7ee27
@@ -0,0 +1,116 @@
1
+ require 'piplrequest'
2
+ require 'json'
3
+ require 'pry'
4
+
5
+ class PiplCollector
6
+ def initialize(input_dir, output_dir, output_append_dir, id_field, ignore_files, api_key, field_mapping)
7
+ @input_dir = input_dir
8
+ @output_dir = output_dir
9
+ @output_append_dir = output_append_dir
10
+ @id_field = id_field
11
+ @ignore_files = ignore_files
12
+ @api_key = api_key
13
+ @field_mapping = field_mapping
14
+ @already_collected = load_output_files
15
+ end
16
+
17
+ # Load the output files into already_collected
18
+ def load_output_files
19
+ collected = []
20
+
21
+ # Make a list of all saved files
22
+ Dir.foreach(@output_dir) do |file|
23
+ next if file == '.' or file == '..'
24
+ collected.push(file.gsub(".json", ""))
25
+ end
26
+
27
+ return collected
28
+ end
29
+
30
+ # Save output file
31
+ def save_output_file(output_item, data_item)
32
+ id = gen_filename_from_id(data_item)
33
+ File.write(@output_dir+"/"+id+".json", output_item)
34
+ @already_collected.push(id)
35
+ end
36
+
37
+ # Generates a file-safe name from the id field
38
+ def gen_filename_from_id(data_item)
39
+ data_item[@id_field].gsub(":", "").gsub("/", "").gsub(".", "")
40
+ end
41
+
42
+ # Checks if it is already collected
43
+ def was_collected?(data_item)
44
+ @already_collected.include?(gen_filename_from_id(data_item))
45
+ end
46
+
47
+ # Get info on person from pipl
48
+ def get_person(data_item)
49
+ sleep(1)
50
+
51
+ # Get data from Pipl
52
+ p = PiplRequest.new(@api_key, @field_mapping)
53
+ output = p.get_data(data_item)
54
+
55
+ # Handle output
56
+ save_output_file(output, data_item) if output
57
+ return JSON.parse(output) if output
58
+ end
59
+
60
+ # Gets content for already collected person
61
+ def get_already_collected_person(data_item)
62
+ filename = @output_dir+"/"+gen_filename_from_id(data_item)+".json"
63
+ return file = JSON.parse(File.read(filename))
64
+ end
65
+
66
+ # Process file
67
+ def process(file)
68
+ data = JSON.parse(File.read(file))
69
+ outfile = Array.new
70
+
71
+ # Go through each item in file
72
+ data.each do |item|
73
+ if !was_collected?(item)
74
+ item[:pipl] = get_person(item)
75
+ else
76
+ item[:pipl] = get_already_collected_person(item)
77
+ end
78
+ outfile.push(item)
79
+ end
80
+
81
+ JSON.pretty_generate(outfile)
82
+ end
83
+
84
+ # Create if they don't exist
85
+ def create_write_dirs(dir)
86
+ dirs = dir.split("/")
87
+ dirs.delete("")
88
+ overallpath = ""
89
+ dirs.each do |d|
90
+ Dir.mkdir(overallpath+"/"+d) if !File.directory?(overallpath+"/"+d)
91
+ overallpath += ("/"+d)
92
+ end
93
+ end
94
+
95
+ # Figure out where to write it
96
+ def get_write_dir(dir, file)
97
+ dir_save = dir.gsub(@input_dir, @output_append_dir)
98
+ return dir_save+"/"+file
99
+ end
100
+
101
+ # Run on files
102
+ def run(dir)
103
+ Dir.foreach(dir) do |file|
104
+ next if file == '.' or file == '..'
105
+ if File.directory?(dir+"/"+file)
106
+ run(dir+"/"+file)
107
+ elsif file.include?(".json") && !file.include?(@ignore_files)
108
+ if !File.exist?(get_write_dir(dir, file))
109
+ with_pipl = process(dir+"/"+file)
110
+ create_write_dirs(dir.gsub(@input_dir, @output_append_dir))
111
+ File.write(get_write_dir(dir, file), with_pipl)
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: piplcollector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Gets data from Pipl for dir of files
14
+ email: shidash@shidash.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/piplcollector.rb
20
+ homepage: https://github.com/TransparencyToolkit/piplcollector
21
+ licenses:
22
+ - GPL
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.8
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Gets data from Pipl for dir of files
44
+ test_files: []
45
+ has_rdoc: