setlr 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,8 @@ from itertools import chain
27
27
  import zipfile
28
28
  import gzip
29
29
 
30
+ import logging
31
+
30
32
  import hashlib
31
33
  from slugify import slugify
32
34
 
@@ -135,7 +137,7 @@ def read_csv(location, result):
135
137
  if result.value(csvw.header):
136
138
  args['header'] = [0]
137
139
  df = pandas.read_csv(get_content(location, result),encoding='utf-8', **args)
138
- print "Loaded", location
140
+ logger.debug("Loaded %s", location)
139
141
  return df
140
142
 
141
143
  def read_graph(location, result, g = None):
@@ -153,7 +155,7 @@ def read_graph(location, result, g = None):
153
155
  #print e
154
156
  pass
155
157
  if len(graph) == 0:
156
- print "Could not parse graph: ", location
158
+ logger.error("Could not parse graph: %s", location)
157
159
  if result[RDF.type:OWL.Ontology]:
158
160
  for ontology in graph.subjects(RDF.type, OWL.Ontology):
159
161
  imports = [graph.resource(x) for x in graph.objects(ontology, OWL.imports)]
@@ -210,14 +212,12 @@ def get_content(location, result):
210
212
 
211
213
  def to_tempfile(f):
212
214
  tf = tempfile.TemporaryFile()
213
- print "Writing to disk"
215
+ logger.debug("Writing %s to disk.", f)
214
216
  for chunk in f:
215
217
  if chunk: # filter out keep-alive new chunks
216
218
  tf.write(chunk)
217
- sys.stdout.write(".")
218
- sys.stdout.flush()
219
219
  tf.seek(0)
220
- print "done."
220
+ logger.debug("Finished writing %s to disk.", f)
221
221
  return tf
222
222
 
223
223
  def unpack_zipfile(f):
@@ -248,7 +248,7 @@ def read_xml(location, result):
248
248
  validate_dtd = True
249
249
  f = iterparse_filter.IterParseFilter(validate_dtd=validate_dtd)
250
250
  if result.value(setl.xpath) is None:
251
- print "no xpath to select on!"
251
+ logger.debug("no xpath to select on from %s", location)
252
252
  f.iter_end("/*")
253
253
  for xp in result[setl.xpath]:
254
254
  f.iter_end(xp.value)
@@ -327,7 +327,7 @@ def load_csv(csv_resource):
327
327
  lit = Literal(value, datatype=datatype.identifier)
328
328
  #print i, prop.identifier, lit.n3()
329
329
  res.add(prop.identifier, lit)
330
- print "Table has", len(s), "rows,", len(header), "columns, and", len(csv_graph), "triples."
330
+ logger.debug("Table has %s rows, %s columns, and %s triples", len(s), len(header), len(csv_graph))
331
331
  return csv_graph
332
332
 
333
333
  formats = {
@@ -375,7 +375,7 @@ def get_order(setl_graph):
375
375
  return toposort_flatten(nodes)
376
376
 
377
377
  def extract(e, resources):
378
- print 'Extracting',e.identifier
378
+ logger.info('Extracting %s',e.identifier)
379
379
  used = e.value(prov.used)
380
380
  for result in e.subjects(prov.wasGeneratedBy):
381
381
  if used is None:
@@ -383,7 +383,7 @@ def extract(e, resources):
383
383
  for t in result[RDF.type]:
384
384
  # Do we know how to generate this?
385
385
  if t.identifier in extractors:
386
- print "Extracted", used.identifier
386
+ logger.info("Extracted %s", used.identifier)
387
387
  resources[result.identifier] = extractors[t.identifier](used.identifier, result)
388
388
  return resources[result.identifier]
389
389
 
@@ -450,7 +450,7 @@ def process_row(row, template, rowname, table, resources, transform, variables):
450
450
  this = None
451
451
  if isinstance(parent, dict):
452
452
  if len(task) != 2:
453
- print task
453
+ logger.debug(task)
454
454
  key, value = task
455
455
  kt = get_template(key)
456
456
  key = kt.render(**env)
@@ -469,13 +469,12 @@ def process_row(row, template, rowname, table, resources, transform, variables):
469
469
  continue
470
470
  except Exception as e:
471
471
  trace = sys.exc_info()[2]
472
- print "Error in conditional", value['@if']
473
- print "Relevant Environment:"
472
+ logger.error("Error in conditional %s\nRelevant Environment:", value['@if'])
474
473
  for key, v in env.items():
475
474
  if key in value['@if']:
476
475
  if hasattr(v, 'findall'):
477
- v = xml.etree.ElementTree.tostring(v)
478
- print key + "\t" + str(v)[:1000]
476
+ v = xml.etree.ElementTree.tostring(v)
477
+ logger.error(key + "\t" + str(v)[:1000])
479
478
  raise e, None, trace
480
479
  if '@for' in value:
481
480
  f = value['@for']
@@ -504,8 +503,8 @@ def process_row(row, template, rowname, table, resources, transform, variables):
504
503
  pass
505
504
  except Exception as e:
506
505
  trace = sys.exc_info()[2]
507
- print "Error in for:", value['@for']
508
- print "Locals:", env.keys()
506
+ logger.error("Error in @for: %s", value['@for'])
507
+ logger.error("Locals: %s", env.keys())
509
508
  raise e, None, trace
510
509
  continue
511
510
  if '@with' in value:
@@ -534,8 +533,8 @@ def process_row(row, template, rowname, table, resources, transform, variables):
534
533
  pass
535
534
  except Exception as e:
536
535
  trace = sys.exc_info()[2]
537
- print "Error in with:", value['@with']
538
- print "Locals:", env.keys()
536
+ logger.error("Error in with: %s", value['@with'])
537
+ logger.error("Locals: %s", env.keys())
539
538
  raise e, None, trace
540
539
  continue
541
540
  this = {}
@@ -555,13 +554,13 @@ def process_row(row, template, rowname, table, resources, transform, variables):
555
554
  this = template.render(**env)
556
555
  except Exception as e:
557
556
  trace = sys.exc_info()[2]
558
- print "Error in template", value, type(value)
559
- print "Relevant Environment:"
557
+ logger.error("Error in template %s %s", value, type(value))
558
+ logger.error("Relevant Environment:")
560
559
  for key, v in env.items():
561
560
  if key in value:
562
561
  if hasattr(v, 'findall'):
563
562
  v = xml.etree.ElementTree.tostring(v)
564
- print key + "\t" + str(v)[:1000]
563
+ logger.error(key + "\t" + str(v)[:1000])
565
564
  raise e, None, trace
566
565
  else:
567
566
  this = value
@@ -573,7 +572,7 @@ def process_row(row, template, rowname, table, resources, transform, variables):
573
572
  return result
574
573
 
575
574
  def json_transform(transform, resources):
576
- print "Transforming", transform.identifier
575
+ logger.info("Transforming %s", transform.identifier)
577
576
  tables = [u for u in transform[prov.used]]
578
577
  variables = {}
579
578
  for usage in transform[prov.qualifiedUsage]:
@@ -584,7 +583,7 @@ def json_transform(transform, resources):
584
583
  #print "Using", used.identifier, "as", roleID.value
585
584
 
586
585
  generated = list(transform.subjects(prov.wasGeneratedBy))[0]
587
- print "Generating", generated.identifier
586
+ logger.info("Generating %s", generated.identifier)
588
587
 
589
588
  if generated.identifier in resources:
590
589
  result = resources[generated.identifier]
@@ -594,7 +593,7 @@ def json_transform(transform, resources):
594
593
  result = ConjunctiveGraph(store="Sleepycat")
595
594
  if generated[RDF.type : setl.Persisted]:
596
595
  tempdir = tempfile.mkdtemp()
597
- print "Persisting", generated.identifier, "to", tempdir
596
+ logger.info("Persisting %s to %s", generated.identifier, tempdir)
598
597
  result.store.open(tempdir, True)
599
598
  s = transform.value(prov.value).value
600
599
  try:
@@ -602,26 +601,26 @@ def json_transform(transform, resources):
602
601
  except Exception as e:
603
602
  trace = sys.exc_info()[2]
604
603
  if 'No JSON object could be decoded' in e.message:
605
- print s
604
+ logger.error(s)
606
605
  if 'line' in e.message:
607
606
  line = int(re.search("line ([0-9]+)", e.message).group(1))
608
- print "Error in parsing JSON Template at line %d:" % line
609
- print '\n'.join(["%d: %s"%(i+line-3, x) for i, x in enumerate(s.split("\n")[line-3:line+4])])
607
+ logger.error("Error in parsing JSON Template at line %d:", line)
608
+ logger.error('\n'.join(["%d: %s"%(i+line-3, x) for i, x in enumerate(s.split("\n")[line-3:line+4])]))
610
609
  raise e, None, trace
611
610
  context = transform.value(setl.hasContext)
612
611
  if context is not None:
613
612
  context = json.loads(context.value)
614
613
  for t in tables:
615
- print "Using", t.identifier
614
+ logger.info("Using %s", t.identifier)
616
615
  table = resources[t.identifier]
617
616
  it = table
618
617
  if isinstance(table, pandas.DataFrame):
619
618
  #if run_samples:
620
619
  # table = table.head()
621
620
  it = table.iterrows()
622
- print "Transforming", len(table.index), "rows."
621
+ logger.info("Transforming %s rows.", len(table.index))
623
622
  else:
624
- print "Transforming", t.identifier
623
+ logger.info("Transforming %s", t.identifier)
625
624
  for rowname, row in it:
626
625
  if run_samples and rowname >= 100:
627
626
  break
@@ -640,22 +639,20 @@ def json_transform(transform, resources):
640
639
  result.parse(data=data, format="json-ld")
641
640
  del data
642
641
  after = len(result)
643
- sys.stdout.write('\r')
644
- sys.stdout.write("Row "+str(rowname)+" added "+str(after-before)+" triples.")
642
+ logger.debug("Row "+str(rowname)+" added "+str(after-before)+" triples.")
645
643
  sys.stdout.flush()
646
644
  except Exception as e:
647
645
  trace = sys.exc_info()[2]
648
646
  if isinstance(table, pandas.DataFrame):
649
- print "Error on", rowname, row
647
+ logger.error("Error on %s %s", rowname, row)
650
648
  else:
651
- print "Error on", rowname
649
+ logger.error("Error on %s", rowname)
652
650
  raise e, None, trace
653
651
 
654
- print ""
655
652
  resources[generated.identifier] = result
656
653
 
657
654
  def transform(transform_resource, resources):
658
- print 'Transforming',transform_resource.identifier
655
+ logger.info('Transforming %s',transform_resource.identifier)
659
656
 
660
657
  transform_graph = ConjunctiveGraph()
661
658
  for result in transform_graph.subjects(prov.wasGeneratedBy):
@@ -669,26 +666,26 @@ def transform(transform_resource, resources):
669
666
 
670
667
 
671
668
  for script in [u for u in used if u[RDF.type:setl.PythonScript]]:
672
- print "Script:", script.identifier
669
+ logger.info("Script: %s", script.identifier)
673
670
  s = script.value(prov.value).value
674
671
  l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
675
672
  gl = dict()
676
673
  exec(s, gl, l)
677
674
 
678
675
  for jsldt in [u for u in used if u[RDF.type:setl.PythonScript]]:
679
- print "Script:", script.identifier
676
+ logger.info("Script: %s", script.identifier)
680
677
  s = script.value(prov.value).value
681
678
  l = dict(graph = transform_graph, setl_graph = transform_resource.graph)
682
679
  gl = dict()
683
680
  exec(s, gl, l)
684
681
 
685
682
  for update in [u for u in used if u[RDF.type:sp.Update]]:
686
- print "Update:", update.identifier
683
+ logger.info("Update: %s", update.identifier)
687
684
  query = update.value(prov.value).value
688
685
  transform_graph.update(query)
689
686
 
690
687
  for construct in [u for u in used if u[RDF.type:sp.Construct]]:
691
- print "Construct:", construct.identifier
688
+ logger.info("Construct: %s", construct.identifier)
692
689
  query = construct.value(prov.value).value
693
690
  g = transform_graph.query(query)
694
691
  transform_graph += g
@@ -703,7 +700,7 @@ def transform(transform_resource, resources):
703
700
 
704
701
 
705
702
  def load(load_resource, resources):
706
- print 'Loading',load_resource.identifier
703
+ logger.info('Loading %s',load_resource.identifier)
707
704
  file_graph = Dataset(default_union=True)
708
705
  to_disk = False
709
706
  for used in load_resource[prov.used]:
@@ -711,15 +708,15 @@ def load(load_resource, resources):
711
708
  to_disk = True
712
709
  file_graph = Dataset(store='Sleepycat', default_union=True)
713
710
  tempdir = tempfile.mkdtemp()
714
- print "Gathering", load_resource.identifier, "into", tempdir
711
+ logger.debug("Gathering %s into %s", load_resource.identifier, tempdir)
715
712
  file_graph.store.open(tempdir, True)
716
713
  break
717
714
  if len(list(load_resource[prov.used])) == 1:
718
- print "Using",load_resource.value(prov.used).identifier
715
+ logger.info("Using %s",load_resource.value(prov.used).identifier)
719
716
  file_graph = resources[load_resource.value(prov.used).identifier]
720
717
  else:
721
718
  for used in load_resource[prov.used]:
722
- print "Using",used.identifier
719
+ logger.info("Using %s",used.identifier)
723
720
  used_graph = resources[used.identifier]
724
721
  file_graph.namespace_manager = used_graph.namespace_manager
725
722
  #print used_graph.serialize(format="trig")
@@ -757,6 +754,9 @@ actions = {
757
754
  }
758
755
 
759
756
  def _setl(setl_graph):
757
+ global logger
758
+ if logger is None:
759
+ logger = logging.getLogger(__name__)
760
760
  resources = {}
761
761
  resources.update(actions)
762
762
 
@@ -767,14 +767,23 @@ def _setl(setl_graph):
767
767
  if len(action) > 0:
768
768
  action[0](task, resources)
769
769
  return resources
770
+ logger = None
770
771
 
771
772
  def main():
772
773
  args = sys.argv[1:]
774
+ logging_level = logging.DEBUG
775
+ if '-q' in args or '--quiet' in args:
776
+ logging_level = logging.WARNING
777
+ logging.basicConfig(level=logging_level)
778
+
779
+ global logger
780
+ logger = logging.getLogger(__name__)
781
+
773
782
  global run_samples
774
783
  setl_file = args[0]
775
784
  if 'sample' in args:
776
785
  run_samples = True
777
- print "Only processing a few sample rows."
786
+ logger.warning("Only processing a few sample rows.")
778
787
  setl_graph = ConjunctiveGraph()
779
788
  content = open(setl_file).read()
780
789
  setl_graph.parse(data=content, format="turtle")
@@ -782,7 +791,7 @@ def main():
782
791
  graphs = _setl(setl_graph)
783
792
  # print "Finished processing"
784
793
  # return graphs
785
-
794
+
786
795
  if __name__ == '__main__':
787
796
  result = main()
788
- print "Exiting"
797
+ logger.info("Exiting")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.1
2
2
  Name: setlr
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: setlr is a tool for Semantic Extraction, Transformation, and Loading.
5
5
  Home-page: http://packages.python.org/setlr
6
6
  Author: Jim McCusker
@@ -9,6 +9,6 @@ License: Apache License 2.0
9
9
  Description: SETLr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.
10
10
  Keywords: rdf semantic etl
11
11
  Platform: UNKNOWN
12
- Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Development Status :: 5 - Production/Stable
13
13
  Classifier: Topic :: Utilities
14
14
  Classifier: License :: OSI Approved :: Apache Software License
@@ -0,0 +1 @@
1
+ {"is_release": false, "git_version": "a612669"}
usr/local/bin/setlr ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/python
2
+ # EASY-INSTALL-ENTRY-SCRIPT: 'setlr==0.2.4','console_scripts','setlr'
3
+ __requires__ = 'setlr==0.2.4'
4
+ import sys
5
+ from pkg_resources import load_entry_point
6
+
7
+ if __name__ == '__main__':
8
+ sys.exit(
9
+ load_entry_point('setlr==0.2.4', 'console_scripts', 'setlr')()
10
+ )
@@ -1,14 +0,0 @@
1
- Metadata-Version: 1.1
2
- Name: setlr
3
- Version: 0.2.2
4
- Summary: setlr is a tool for Semantic Extraction, Transformation, and Loading.
5
- Home-page: http://packages.python.org/setlr
6
- Author: Jim McCusker
7
- Author-email: mccusj@cs.rpi.edu
8
- License: Apache License 2.0
9
- Description: SETLr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.
10
- Keywords: rdf semantic etl
11
- Platform: UNKNOWN
12
- Classifier: Development Status :: 3 - Alpha
13
- Classifier: Topic :: Utilities
14
- Classifier: License :: OSI Approved :: Apache Software License
@@ -1 +0,0 @@
1
- {"is_release": false, "git_version": "4b81868"}
setlr-0.2.2/setup.cfg DELETED
@@ -1,11 +0,0 @@
1
- [bdist_wheel]
2
- universal = 1
3
-
4
- [metadata]
5
- description-file = README.md
6
-
7
- [egg_info]
8
- tag_build =
9
- tag_date = 0
10
- tag_svn_revision = 0
11
-
setlr-0.2.2/setup.py DELETED
@@ -1,48 +0,0 @@
1
- import os
2
- from setuptools import setup, find_packages
3
-
4
- # Utility function to read the README file.
5
- # Used for the long_description. It's nice, because now 1) we have a top level
6
- # README file and 2) it's easier to type in the README file than to put a raw
7
- # string in below ...
8
- def read(fname):
9
- return open(os.path.join(os.path.dirname(__file__), fname)).read()
10
-
11
- setup(
12
- name = "setlr",
13
- version = "0.2.2",
14
- author = "Jim McCusker",
15
- author_email = "mccusj@cs.rpi.edu",
16
- description = ("setlr is a tool for Semantic Extraction, Transformation, and Loading."),
17
- license = "Apache License 2.0",
18
- keywords = "rdf semantic etl",
19
- url = "http://packages.python.org/setlr",
20
- packages=['setlr'],
21
- long_description='''SETLr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.''',
22
- include_package_data = True,
23
- install_requires = [
24
- 'pip>=9.0.0',
25
- 'cython',
26
- 'numpy',
27
- 'rdflib',
28
- 'rdflib-jsonld',
29
- 'pandas',
30
- 'requests',
31
- 'toposort',
32
- 'beautifulsoup4',
33
- 'jinja2',
34
- 'lxml',
35
- 'xlrd',
36
- 'ijson',
37
- 'requests-testadapter',
38
- 'python-slugify',
39
- ],
40
- entry_points = {
41
- 'console_scripts': ['setlr=setlr:main'],
42
- },
43
- classifiers=[
44
- "Development Status :: 3 - Alpha",
45
- "Topic :: Utilities",
46
- "License :: OSI Approved :: Apache Software License",
47
- ],
48
- )