nesstar-api 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ module Nesstar
2
+
3
+ class Catalog
4
+
5
+ attr_reader :studies, :label, :description
6
+ attr_writer :studies, :label, :description
7
+
8
+ end
9
+
10
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Value information for the rows in a variable
4
+ class Category
5
+
6
+ attr_reader :value, :label, :category_statistics
7
+ attr_writer :value, :label, :category_statistics
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Stats about a category belonging to a a variable
4
+ class CategoryStatistic
5
+
6
+ attr_reader :type, :value
7
+ attr_writer :type, :value
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Contains a set of variables and belongs to a catalog
4
+ class Study
5
+
6
+ attr_reader :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight
7
+ attr_writer :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Dates that are important to the study eg. start date or end date
4
+ class StudyDate
5
+
6
+ attr_reader :type, :date
7
+ attr_writer :type, :date
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,10 @@
1
+ module Nesstar
2
+
3
+ class SummaryStat
4
+
5
+ attr_reader :type, :value
6
+ attr_writer :type, :value
7
+
8
+ end
9
+
10
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Information about a variable/column in a dataset
4
+ class Variable
5
+
6
+ attr_reader :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
7
+ attr_writer :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,5 @@
1
+ module Nesstar
2
+ module Api
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,347 @@
1
+ require 'rubygems'
2
+ require 'zlib'
3
+ require 'net/http'
4
+ require 'stringio'
5
+ require 'cgi'
6
+ require 'libxml'
7
+ require 'nokogiri'
8
+ require 'tree'
9
+ require 'nesstar-api/catalog'
10
+ require 'nesstar-api/category'
11
+ require 'nesstar-api/category_statistic'
12
+ require 'nesstar-api/study'
13
+ require 'nesstar-api/study_date'
14
+ require 'nesstar-api/variable'
15
+ require 'nesstar-api/summary_stat'
16
+
17
+ module Nesstar
18
+ module Api
19
+
20
+ class CatalogApi
21
+
22
+ #given tree of datasets and catalogs figure out which are the
23
+ #survey types, which are surveys and link them to child datasets
24
+ def parse_surveys_from_nodes node, surveys_hash, survey_types_hash
25
+ node.children.each do |node|
26
+ if node.name.index('fStudy')
27
+ #this is a dataset, find its survey and survey_type
28
+ survey = node.parent
29
+ if surveys_hash.has_key?(survey.name)
30
+ surveys_hash[survey.name].push(node.name)
31
+ else
32
+ surveys_hash[survey.name] = []
33
+ surveys_hash[survey.name].push(node.name)
34
+ end
35
+ survey_type = survey.parent
36
+ if survey_type
37
+ if survey_types_hash.has_key?(survey_type.name)
38
+ if !survey_types_hash[survey_type.name].include?(survey.name)
39
+ survey_types_hash[survey_type.name].push(survey.name)
40
+ end
41
+ else
42
+ survey_types_hash[survey_type.name] = []
43
+ survey_types_hash[survey_type.name].push(survey.name)
44
+ end
45
+ else
46
+ if survey_types_hash.has_key?('none')
47
+ if !survey_types_hash['none'].include?(survey.name)
48
+ survey_types_hash['none'].push(survey.name)
49
+ end
50
+ else
51
+ survey_types_hash['none'] = []
52
+ survey_types_hash['none'].push(survey.name)
53
+ end
54
+ end
55
+ else
56
+ #its a catalog so keep going downwards
57
+ parse_surveys_from_nodes node, surveys_hash, survey_types_hash
58
+ end
59
+ end
60
+ end
61
+
62
+ def get_catalog url, catalog
63
+
64
+ #Hash of catalogs to their child datasets
65
+ catalog_hash = Hash.new
66
+
67
+ uri = URI.parse(url)
68
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
69
+ full_uri = uri.merge query_string
70
+ res = Net::HTTP.get full_uri
71
+ doc = Nokogiri::HTML(res)
72
+
73
+ parse_out_datasets doc, uri.host, catalog, catalog_hash
74
+
75
+ return catalog_hash
76
+ end
77
+
78
+ def get_nodes url, catalog
79
+ #tree of catalogs to their child datasets
80
+ root_tree_node = Tree::TreeNode.new(catalog, "Catalog Content")
81
+
82
+ uri = URI.parse(url)
83
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
84
+ full_uri = uri.merge query_string
85
+ res = Net::HTTP.get full_uri
86
+ doc = Nokogiri::HTML(res)
87
+
88
+ dataset_tree doc, uri.host, catalog, root_tree_node
89
+
90
+ return root_tree_node
91
+ end
92
+
93
+ #get the ddi xml for a nesstar dataset
94
+ def get_ddi uri, dataset
95
+ ddi_uri = URI.parse(uri)
96
+ ddi_uri.merge!("/obj/fStudy/" + dataset)
97
+ ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
98
+ res = Net::HTTP.get(ddi_uri)
99
+ gz = Zlib::GzipReader.new(StringIO.new(res))
100
+ xml = gz.read
101
+ return xml
102
+ end
103
+
104
+ #return a catalog object with information inside it
105
+ #uri is something like http://nesstar.here.com
106
+ #catalog is the name of the catalog eg Catalog20
107
+ def get_catalog_information uri, catalog
108
+ catalog_uri = URI.parse(uri)
109
+ catalog_uri.merge!("/obj/fCatalog/" + catalog)
110
+ catalog_res = Net::HTTP.get(catalog_uri)
111
+ gz = Zlib::GzipReader.new(StringIO.new(catalog_res))
112
+ catalog_info = gz.read
113
+ doc = Nokogiri::XML(catalog_info)
114
+ label = doc.xpath('//s:label')
115
+ description = doc.xpath('//s:comment')
116
+ catalog = Nesstar::Catalog.new
117
+ catalog.label = label[0].content.strip unless label[0] == nil
118
+ catalog.description = description[0].content.strip unless description[0] == nil
119
+ return catalog
120
+ end
121
+
122
+ #information about the dataset only
123
+ def get_simple_study_information uri, dataset
124
+ dataset_uri = URI.parse(uri)
125
+ dataset_uri.merge!("/obj/fStudy/" + dataset)
126
+ dataset_res = Net::HTTP.get(dataset_uri)
127
+ gz = Zlib::GzipReader.new(StringIO.new(dataset_res))
128
+ dataset_info = gz.read
129
+ doc = Nokogiri::XML(dataset_info)
130
+ label = doc.xpath('//s:label')
131
+ description = doc.xpath('//s:comment')
132
+ study = Nesstar::Study.new
133
+ study.title = label[0].content.strip unless label[0] == nil
134
+ study.abstract = description[0].content.strip unless description[0] == nil
135
+ return study
136
+ end
137
+
138
+ #information about the dataset and its variables
139
+ #inputs are the uri to a dataset file
140
+ def get_study_information uri, dataset
141
+ #TODO use the get_ddi method above
142
+ ddi_uri = URI.parse(uri)
143
+ ddi_uri.merge!("/obj/fStudy/" + dataset)
144
+ ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
145
+ res = Net::HTTP.get(ddi_uri)
146
+ gz = Zlib::GzipReader.new(StringIO.new(res))
147
+ xml = gz.read
148
+ catalog = Nesstar::Catalog.new
149
+ study = Nesstar::Study.new
150
+ study_info_hash = Hash.new
151
+ parser = LibXML::XML::Parser.string(xml)
152
+ doc = parser.parse
153
+ studynodes = doc.find('//stdyDscr')
154
+ abstracts = studynodes[0].find('//abstract')
155
+ abstract = ""
156
+ abstracts.each do |ab|
157
+ abstract << ab.first.content.strip
158
+ end
159
+ study.abstract = abstract
160
+ study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
161
+ study.id = studynodes[0].find('//IDNo')[0].first.content.strip
162
+
163
+ #start and finish dates for study
164
+ dates = []
165
+ date = studynodes[0].find('//sumDscr/collDate')
166
+ date.each do |d|
167
+ a = d.attributes
168
+ study_date = Nesstar::StudyDate.new
169
+ study_date.type = a.get_attribute('event').value.strip
170
+ study_date.date = a.get_attribute('date').value.strip
171
+ dates.push(study_date)
172
+ end
173
+ study.dates = dates
174
+ study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0] == nil
175
+ # study.weight = studynodes[0].find('//sampProc')[0].first.content
176
+ study.variables = get_variable_information doc
177
+ return study
178
+ end
179
+
180
+ def get_children
181
+
182
+ end
183
+
184
+ def add_dataset
185
+
186
+ end
187
+
188
+ private
189
+
190
+ #pull out all the datasets for a catalog, recursively if need be
191
+ def dataset_tree doc, uri, catalog, tree_node
192
+ # if tree_node == nil
193
+ # tree_node = Tree::TreeNode.new(catalog, "Catalog Content")
194
+ # end
195
+ links = doc.xpath('//a')
196
+ links.each do |link|
197
+ if link.content.index(uri) != nil && link.content.index(catalog) == nil && link.content.index('fCatalog') == nil
198
+ #its a dataset
199
+ child_node = Tree::TreeNode.new(link.content, catalog + " child")
200
+ tree_node << child_node
201
+ elsif link.content.index('fCatalog') != nil && link.content.index(catalog) == nil
202
+ #its a new catalog
203
+ url = URI.parse(link.content)
204
+ new_catalog = url.path.split('/').last
205
+ catalog_doc = retrieve_html "http://" + url.host + ':' + url.port.to_s, new_catalog
206
+ child_node = Tree::TreeNode.new(link.content, catalog + " child")
207
+ tree_node << child_node
208
+ dataset_tree catalog_doc, url.host, new_catalog, child_node
209
+ end
210
+ end
211
+ return tree_node
212
+ end
213
+
214
+ #pull out all the datasets for a catalog, recursively if need be
215
+ def parse_out_datasets doc, uri, catalog, catalog_hash
216
+ if catalog_hash[catalog] == nil
217
+ catalog_hash[catalog] = []
218
+ end
219
+ links = doc.xpath('//a')
220
+ links.each do |link|
221
+ if link.content.index(uri) != nil && link.content.index(catalog) == nil && link.content.index('fCatalog') == nil
222
+ #its a dataset
223
+ catalog_hash[catalog].push(link.content)
224
+ elsif link.content.index('fCatalog') != nil && link.content.index(catalog) == nil
225
+ #its a new catalog
226
+ url = URI.parse(link.content)
227
+ new_catalog = url.path.split('/').last
228
+ catalog_doc = retrieve_html "http://" + url.host + ':' + url.port.to_s, new_catalog
229
+ parse_out_datasets catalog_doc, url.host, new_catalog, catalog_hash
230
+ end
231
+ end
232
+ return catalog_hash
233
+ end
234
+
235
+ def retrieve_html url, catalog
236
+ uri = URI.parse(url)
237
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
238
+ full_uri = uri.merge query_string
239
+ res = Net::HTTP.get full_uri
240
+ doc = Nokogiri::HTML(res)
241
+ return doc
242
+ end
243
+
244
+ #information about the variables
245
+ def get_variable_information doc
246
+ variables = []
247
+ variable_info_hash = Hash.new
248
+ docnodes = doc.find('//dataDscr')
249
+ vargroups = docnodes[0].find('//dataDscr/varGrp')
250
+ vargroups.each do |vargroup|
251
+ #hash which holds all the variable groups
252
+ a = vargroup.attributes
253
+ groups = a.get_attribute('var')
254
+ if groups != nil
255
+ groups = a.get_attribute('var')
256
+ variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
257
+ # else
258
+ # variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
259
+ end
260
+ end
261
+ vars = docnodes[0].find('//dataDscr/var')
262
+ vars.each do |var|
263
+ variable = Nesstar::Variable.new
264
+ var_attr = var.attributes
265
+ variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
266
+ variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
267
+ variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
268
+ variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
269
+ variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
270
+ rng = var.find('./valrng')
271
+ if rng != nil
272
+ if rng[0] != nil
273
+ range_attr = rng[0].first.attributes
274
+ max_val = range_attr.get_attribute('max')
275
+ variable.max = max_val.value.strip unless max_val == nil
276
+ min_val = range_attr.get_attribute('min')
277
+ variable.min = min_val.value.strip unless min_val == nil
278
+ end
279
+ end
280
+ q = var.find('./qstn')
281
+ if q[0] != nil
282
+ ql = q[0].find('./qstnLit')
283
+ if ql != nil
284
+ if ql[0] != nil
285
+ variable.question = ql[0].first.content.strip
286
+ end
287
+ end
288
+ iv = q[0].find('./ivuInstr')
289
+ if iv != nil
290
+ if iv[0] != nil
291
+ variable.interview_instruction = iv[0].first.content.strip
292
+ end
293
+ end
294
+ end
295
+ stats = var.find('./sumStat')
296
+ summary_stats = []
297
+ stats.each do |stat|
298
+ a = stat.attributes
299
+ # summary_stats[a.get_attribute('type').value] = stat.first.content
300
+ statistic = Nesstar::SummaryStat.new
301
+ statistic.type = a.get_attribute('type').value.strip
302
+ statistic.value = stat.first.content.strip
303
+ summary_stats.push(statistic)
304
+ end
305
+ variable.summary_stats = summary_stats
306
+ catgry = var.find('./catgry')
307
+ categories = []
308
+ #categories in ddi are value domains in mb
309
+ catgry.each do |cat|
310
+ category = Nesstar::Category.new
311
+ category.value = cat.find('./catValu').first.content
312
+ labxml = cat.find('./labl')
313
+ if labxml != nil && labxml[0] != nil
314
+ category.label = labxml[0].first.content.strip unless labxml[0].first == nil
315
+ end
316
+ catstat = cat.find('./catStat')
317
+ category_statistics = []
318
+ catstat.each do |cat|
319
+ category_statistic = Nesstar::CategoryStatistic.new
320
+ a = cat.first.attributes unless cat.first == nil
321
+ if a != nil
322
+ category_statistic.type = a.get_attribute('type').strip unless a.get_attribute('type') == nil
323
+ category_statistic.value = catstat.first.content.strip unless catstat.first == nil
324
+ category_statistic.type = a.get_attribute('type').strip unless a.get_attribute('type') == nil
325
+ category_statistics.push(category_statistic)
326
+ end
327
+ end
328
+ category.category_statistics = category_statistics
329
+ categories.push(category_statistics)
330
+ end
331
+ #what group is the variable in
332
+ variable_info_hash.each_key do |key|
333
+ if variable_info_hash[key].include?(variable.id)
334
+ variable.group = key.strip
335
+ break
336
+ end
337
+ end
338
+
339
+ variable.categories = categories
340
+ variables.push(variable)
341
+ end
342
+ return variables
343
+ end
344
+
345
+ end
346
+ end
347
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nesstar-api
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Ian Dunlop
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-24 00:00:00 +00:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - "="
28
+ - !ruby/object:Gem::Version
29
+ hash: 15
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 4
34
+ version: 1.4.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: rubytree
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - "="
44
+ - !ruby/object:Gem::Version
45
+ hash: 61
46
+ segments:
47
+ - 0
48
+ - 8
49
+ - 1
50
+ version: 0.8.1
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: libxml-ruby
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - "="
60
+ - !ruby/object:Gem::Version
61
+ hash: 27
62
+ segments:
63
+ - 1
64
+ - 1
65
+ - 4
66
+ version: 1.1.4
67
+ type: :runtime
68
+ version_requirements: *id003
69
+ description: This gem provides access to NESSTAR API calls from a data provider
70
+ email:
71
+ - ian.dunlop@manchester.ac.uk
72
+ executables: []
73
+
74
+ extensions: []
75
+
76
+ extra_rdoc_files: []
77
+
78
+ files:
79
+ - lib/nesstar-api/catalog.rb
80
+ - lib/nesstar-api/category.rb
81
+ - lib/nesstar-api/category_statistic.rb
82
+ - lib/nesstar-api/study.rb
83
+ - lib/nesstar-api/study_date.rb
84
+ - lib/nesstar-api/summary_stat.rb
85
+ - lib/nesstar-api/variable.rb
86
+ - lib/nesstar-api/version.rb
87
+ - lib/nesstar-api.rb
88
+ has_rdoc: true
89
+ homepage: http://github.com/mygrid/methodbox/nesstar-api
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options: []
94
+
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
105
+ version: "0"
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
114
+ version: "0"
115
+ requirements: []
116
+
117
+ rubyforge_project: nesstar-api
118
+ rubygems_version: 1.3.7
119
+ signing_key:
120
+ specification_version: 3
121
+ summary: Simple API for calling NESSTAR API and returning results
122
+ test_files: []
123
+