PyPI - setlr - Versions diffs - 0.2.13__tar.gz → 0.2.15__tar.gz - Mend

setlr 0.2.13tar.gz → 0.2.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/bin/setlr ADDED Viewed

@@ -0,0 +1,12 @@
+#!/Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/bin/python
+# EASY-INSTALL-ENTRY-SCRIPT: 'setlr==0.2.15','console_scripts','setlr'
+__requires__ = 'setlr==0.2.15'
+import re
+import sys
+from pkg_resources import load_entry_point
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(
+        load_entry_point('setlr==0.2.15', 'console_scripts', 'setlr')()
+    )

{setlr-0.2.13 → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages}/setlr/__init__.py RENAMED Viewed

@@ -75,7 +75,7 @@ class LocalFileAdapter(requests.adapters.HTTPAdapter):
 requests_session = requests.session()
 requests_session.mount('file://', LocalFileAdapter())
 requests_session.mount('file:///', LocalFileAdapter())
 datatypeConverters = collections.defaultdict(lambda: str)
 datatypeConverters.update({
     XSD.string: str,
@@ -95,55 +95,20 @@ _rdf_formats_to_guess = [
     'trix'
 ]
-def lru(original_function, maxsize=1000):
-    mapping = {}
-    PREV, NEXT, KEY, VALUE = 0, 1, 2, 3         # link fields
-    head = [None, None, None, None]        # oldest
-    tail = [head, None, None, None]   # newest
-    head[NEXT] = tail
-    def fn(*args, **kw):
-        key = (args,tuple(kw.items()))
-        PREV, NEXT = 0, 1
-        #print "Cache lookup for "+str(key)
-        link = mapping.get(key, head)
-        if link is head:
-            #print "Cache miss for "+str(key)
-            value = original_function(*args,**kw)
-            if len(mapping) >= maxsize:
-                old_prev, old_next, old_key, old_value = head[NEXT]
-                head[NEXT] = old_next
-                old_next[PREV] = head
-                del mapping[old_key]
-            last = tail[PREV]
-            link = [last, tail, key, value]
-            mapping[key] = last[NEXT] = tail[PREV] = link
-        else:
-            #print "Cache hit for "+str(key)
-            link_prev, link_next, key, value = link
-            link_prev[NEXT] = link_next
-            link_next[PREV] = link_prev
-            last = tail[PREV]
-            last[NEXT] = tail[PREV] = link
-            link[PREV] = last
-            link[NEXT] = tail
-        return value
-    return fn
 def read_csv(location, result):
     args = dict(
         sep = result.value(csvw.delimiter, default=Literal(",")).value,
         #header = result.value(csvw.headerRow, default=Literal(0)).value),
         skiprows = result.value(csvw.skipRows, default=Literal(0)).value,
-        dtype = object
+        # dtype = object    # Does not seem to play well with future and python2/3 conversion
     )
     if result.value(csvw.header):
         args['header'] = [0]
-    df = pandas.read_csv(get_content(location, result),encoding='utf-8', **args)
+    df = pandas.read_csv(location,encoding='utf-8', **args)
     logger.debug("Loaded %s", location)
     return df
 def read_graph(location, result, g = None):
     if g is None:
         g = ConjunctiveGraph()
@@ -168,6 +133,8 @@ def read_graph(location, result, g = None):
     return g
 class FileLikeFromIter(object):
+    _closed = False
     def __init__(self, content_iter):
         self.iter = content_iter
         self.data = b''
@@ -175,6 +142,35 @@ class FileLikeFromIter(object):
     def __iter__(self):
         return self.iter
+    def readable(self):
+        return True
+    def writable(self):
+        return False
+    def seekable(self):
+        return False
+    def closed(self):
+        if self._closed:
+            return True
+        if len(self.data) > 0:
+            return False
+        try:
+            self.data = next(self.iter)
+        except StopIteration:
+            self.closed = True
+            return True
+        return False
+    # Enter and Exit are needed to allow this to work with with
+    def __enter__(self):
+        return self
+    # Could be improved for better error/exception handling
+    def __exit__(self, err_type, value, tracebock):
+        pass
     def read(self, n=None):
         if n is None:
             return self.data + b''.join(l for l in self.iter)
@@ -189,7 +185,7 @@ class FileLikeFromIter(object):
 def _open_local_file(location):
     if location.startswith("file://"):
-        if os.name == 'nt': # skip the initial
+        if os.name == 'nt': # skip the initial
             return open(location.replace('file:///','').replace('file://',''),'rb')
         else:
             return open(location.replace('file://',''),'rb')
@@ -198,7 +194,7 @@ content_handlers = [
     _open_local_file,
     lambda location: FileLikeFromIter(requests.get(location,stream=True).iter_content(1024*1024))
 ]
 def get_content(location, result):
     response = None
     for handler in content_handlers:
@@ -207,7 +203,7 @@ def get_content(location, result):
             break
     if result[RDF.type:setl.Tempfile]:
         result = to_tempfile(response)
     for t in result[RDF.type]:
         # Do we know how to unpack this?
         if t.identifier in unpackers:
@@ -235,16 +231,20 @@ unpackers = {
     setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='r')
 }
+packers = {
+#    setl.Tempfile : lambda x: x,
+    setl.GZipFile : lambda f: gzip.GzipFile(fileobj=f,mode='wb')
+}
 def read_excel(location, result):
     args = dict(
-        sheetname = result.value(setl.sheetname, default=Literal(0)).value,
+        sheet_name = result.value(setl.sheetname, default=Literal(0)).value,
         header = [int(x) for x in result.value(csvw.headerRow, default=Literal('0')).value.split(',')],
         skiprows = result.value(csvw.skipRows, default=Literal(0)).value
     )
     if result.value(csvw.header):
         args['header'] = [result.value(csvw.header).value]
-    with get_content(location, result) as f:
-        df = pandas.read_excel(f,encoding='utf-8', **args)
+    df = pandas.read_excel(location,encoding='utf-8', **args)
     return df
 def read_xml(location, result):
@@ -261,7 +261,7 @@ def read_xml(location, result):
         for (i, (event, ele)) in enumerate(f.iterparse(fo)):
             yield i, ele
 def read_json(location, result):
     selector = result.value(api_vocab.selector)
     if selector is not None:
@@ -269,8 +269,8 @@ def read_json(location, result):
     else:
         selector = ""
     with get_content(location, result) as fo:
-        return enumerate(ijson.items(fo, selector))
+        yield from enumerate(ijson.items(fo, selector))
 extractors = {
     setl.XPORT : lambda location, result: pandas.read_sas(get_content(location, result), format='xport'),
@@ -284,14 +284,14 @@ extractors = {
     URIRef("https://www.iana.org/assignments/media-types/text/plain") : lambda location, result: get_content(location, result)
 }
 try:
     from bs4 import BeautifulSoup
     extractors[setl.HTML] = lambda location, result: BeautifulSoup(get_content(location, result).read(), 'html.parser')
 except Exception as e:
     pass
 def load_csv(csv_resource):
     column_descriptions = {}
     for col in csv_resource[csvw.column]:
@@ -360,7 +360,7 @@ def create_python_function(f, resources):
         local_vars[name.value] = entity
     exec(script.value, local_vars, global_vars)
     resources[f.identifier] = global_vars['result']
 def get_order(setl_graph):
     nodes = collections.defaultdict(set)
@@ -378,7 +378,7 @@ def get_order(setl_graph):
             for derivation in task[prov.qualifiedDerivation]:
                 derived = derivation.value(prov.entity)
                 nodes[task.identifier].add(derived.identifier)
     return toposort_flatten(nodes)
 def extract(e, resources):
@@ -415,7 +415,7 @@ def get_function(expr, local_keys):
     if key not in functions:
         script = '''lambda %s: %s'''% (', '.join(sorted(local_keys)), expr)
         fn = eval(script)
-        fn.__name__ = str(expr)
+        fn.__name__ = expr.encode("ascii", "ignore").decode('utf8')
         functions[key] = fn
     return functions[key]
@@ -425,7 +425,7 @@ def get_template(templ):
         t = Template(templ)
         templates[templ] = t
     return templates[templ]
 def process_row(row, template, rowname, table, resources, transform, variables):
     result = []
     e = {'row':row,
@@ -588,7 +588,7 @@ def json_transform(transform, resources):
         roleID  = role.value(dc.identifier)
         variables[roleID.value] = resources[used.identifier]
         #print "Using", used.identifier, "as", roleID.value
     generated = list(transform.subjects(prov.wasGeneratedBy))[0]
     logger.info("Generating %s", generated.identifier)
@@ -632,6 +632,8 @@ def json_transform(transform, resources):
             if run_samples and rowname >= 100:
                 break
             try:
+                root = None
+                data = None
                 root = {
                     "@id": generated.identifier,
                     "@graph": process_row(row, jslt, rowname, table, resources, transform, variables)
@@ -642,20 +644,22 @@ def json_transform(transform, resources):
                 #graph = ConjunctiveGraph(identifier=generated.identifier)
                 #graph.parse(data=json.dumps(root),format="json-ld")
                 data = json.dumps(root)
-                del root
+                #del root
                 result.parse(data=data, format="json-ld")
-                del data
+                #del data
                 after = len(result)
                 logger.debug("Row "+str(rowname)+" added "+str(after-before)+" triples.")
                 sys.stdout.flush()
             except Exception as e:
                 trace = sys.exc_info()[2]
+                if data is not None:
+                    logger.error("Error parsing tree: %s", data)
                 if isinstance(table, pandas.DataFrame):
                     logger.error("Error on %s %s", rowname, row)
                 else:
                     logger.error("Error on %s", rowname)
                 raise e
     resources[generated.identifier] = result
 def transform(transform_resource, resources):
@@ -666,12 +670,12 @@ def transform(transform_resource, resources):
         transform_graph = ConjunctiveGraph(identifier=result.identifier)
     used = set(transform_resource[prov.used])
     for csv in [u for u in used if u[RDF.type:csvw.Table]]:
         csv_graph = Graph(store=transform_graph.store, identifier=csv)
         csv_graph += graphs[csv.identifier]
     for script in [u for u in used if u[RDF.type:setl.PythonScript]]:
         logger.info("Script: %s", script.identifier)
         s = script.value(prov.value).value
@@ -690,22 +694,29 @@ def transform(transform_resource, resources):
         logger.info("Update: %s", update.identifier)
         query = update.value(prov.value).value
         transform_graph.update(query)
     for construct in [u for u in used if u[RDF.type:sp.Construct]]:
         logger.info("Construct: %s", construct.identifier)
         query = construct.value(prov.value).value
         g = transform_graph.query(query)
         transform_graph += g
     for csv in [u for u in used if u[RDF.type:csvw.Table]]:
         g = Graph(identifier=csv.identifier,store=transform_graph.store)
         g.remove((None, None, None))
         transform_graph.store.remove_graph(csv.identifier)
     for result in transform_graph.subjects(prov.wasGeneratedBy):
         graphs[result.identifier] = transform_graph
+def _load_open(generated):
+    filename = generated.identifier.replace("file://",'')
+    fh = open(filename, 'wb')
+    for type, pack in packers.items():
+        if generated[RDF.type : type]:
+            return pack(fh)
+    return fh
 def load(load_resource, resources):
     logger.info('Loading %s',load_resource.identifier)
     file_graph = Dataset(default_union=True)
@@ -738,7 +749,7 @@ def load(load_resource, resources):
             if fmt in formats:
                 fmt = formats[fmt]
                 #print fmt
-            with open(generated.identifier.replace("file://",''), 'wb') as o:
+            with _load_open(generated) as o:
                 o.write(file_graph.serialize(format=fmt))
                 o.close()
         elif generated[RDF.type:sd.Service]:
@@ -750,8 +761,8 @@ def load(load_resource, resources):
             endpoint_graph.commit()
     #if to_disk:
     #    file_graph.close()
 actions = {
     setl.Extract : extract,
     setl.Transform : json_transform,
@@ -759,7 +770,7 @@ actions = {
     setl.PythonScript : create_python_function,
     setl.IsEmpty : isempty
 }
 def _setl(setl_graph):
     global logger
     if logger is None:
@@ -785,7 +796,7 @@ def main():
     global logger
     logger = logging.getLogger(__name__)
     global run_samples
     setl_file = args[0]
     if 'sample' in args:

Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr/__pycache__/__init__.cpython-36.pyc ADDED Viewed

Binary file

Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr/__pycache__/iterparse_filter.cpython-36.pyc ADDED Viewed

Binary file

Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr/__pycache__/sqlite-store.cpython-36.pyc ADDED Viewed

Binary file

Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr/sqlite-store.py ADDED Viewed

File without changes

{setlr-0.2.13 → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: setlr
-Version: 0.2.13
+Version: 0.2.15
 Summary: setlr is a tool for Semantic Extraction, Transformation, and Loading.
 Home-page: http://packages.python.org/setlr
 Author: Jim McCusker

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/SOURCES.txt RENAMED Viewed

@@ -3,6 +3,7 @@ setup.cfg
 setup.py
 setlr/__init__.py
 setlr/iterparse_filter.py
+setlr/sqlite-store.py
 setlr.egg-info/PKG-INFO
 setlr.egg-info/SOURCES.txt
 setlr.egg-info/dependency_links.txt

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/requires.txt RENAMED Viewed

@@ -4,7 +4,7 @@ cython
 numpy
 rdflib
 rdflib-jsonld
-pandas==0.22.0
+pandas>=0.23.0
 requests
 toposort
 beautifulsoup4

setlr-0.2.13/README.md DELETED Viewed

@@ -1,15 +0,0 @@
-# setlr: The Semantic Extract, Transform and Load-er
-setlr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.
-# Installation
-Simply check out the code, optionally create a python virtual environment, and install it using pip:
-```bash
-pip install setlr
-```
-# Learning how to SETL
-To learn how to use setlr please visit [the tutorial wiki page](https://github.com/tetherless-world/setlr/wiki/SETLr-Basics-Tutorial).

setlr-0.2.13/setlr.egg-info/PKG-INFO DELETED Viewed

@@ -1,14 +0,0 @@
-Metadata-Version: 1.1
-Name: setlr
-Version: 0.2.13
-Summary: setlr is a tool for Semantic Extraction, Transformation, and Loading.
-Home-page: http://packages.python.org/setlr
-Author: Jim McCusker
-Author-email: mccusj@cs.rpi.edu
-License: Apache License 2.0
-Description: SETLr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.
-Keywords: rdf semantic etl
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Topic :: Utilities
-Classifier: License :: OSI Approved :: Apache Software License

setlr-0.2.13/setup.cfg DELETED Viewed

@@ -1,10 +0,0 @@
-[bdist_wheel]
-universal = 1
-[metadata]
-description-file = README.md
-[egg_info]
-tag_build =
-tag_date = 0

setlr-0.2.13/setup.py DELETED Viewed

@@ -1,50 +0,0 @@
-import os
-from setuptools import setup, find_packages
-# Utility function to read the README file.
-# Used for the long_description.  It's nice, because now 1) we have a top level
-# README file and 2) it's easier to type in the README file than to put a raw
-# string in below ...
-def read(fname):
-    return open(os.path.join(os.path.dirname(__file__), fname)).read()
-setup(
-    name = "setlr",
-    version = "0.2.13",
-    author = "Jim McCusker",
-    author_email = "mccusj@cs.rpi.edu",
-    description = ("setlr is a tool for Semantic Extraction, Transformation, and Loading."),
-    license = "Apache License 2.0",
-    keywords = "rdf semantic etl",
-    url = "http://packages.python.org/setlr",
-    packages=['setlr'],
-    long_description='''SETLr is a tool for generating RDF graphs, including named graphs, from almost any kind of tabular data.''',
-    include_package_data = True,
-    install_requires = [
-        'future',
-        'pip>=9.0.0',
-        'cython',
-        'numpy',
-        'rdflib',
-        'rdflib-jsonld',
-        'pandas==0.22.0',
-        'requests',
-        'toposort',
-        'beautifulsoup4',
-        'jinja2',
-        'lxml',
-        'six',
-        'xlrd',
-        'ijson',
-        'requests-testadapter',
-        'python-slugify',
-    ],
-    entry_points = {
-        'console_scripts': ['setlr=setlr:main'],
-    },
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "Topic :: Utilities",
-        "License :: OSI Approved :: Apache Software License",
-    ],
-)

{setlr-0.2.13 → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages}/setlr/iterparse_filter.py RENAMED Viewed

File without changes

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/dependency_links.txt RENAMED Viewed

File without changes

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/entry_points.txt RENAMED Viewed

File without changes

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/pbr.json RENAMED Viewed

File without changes

{setlr-0.2.13/setlr.egg-info → Users/jimmccusker/.pyenv/versions/3.6.9/envs/venv/lib/python3.6/site-packages/setlr-0.2.15-py3.6.egg-info}/top_level.txt RENAMED Viewed

File without changes

setlr 0.2.13__tar.gz → 0.2.15__tar.gz

setlr 0.2.13tar.gz → 0.2.15tar.gz