graffl 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graffl/__init__.py +11 -0
- graffl/cli.py +103 -0
- graffl/config.py +39 -0
- graffl/parser.py +197 -0
- graffl-0.2.0.dist-info/METADATA +23 -0
- graffl-0.2.0.dist-info/RECORD +10 -0
- graffl-0.2.0.dist-info/WHEEL +5 -0
- graffl-0.2.0.dist-info/entry_points.txt +2 -0
- graffl-0.2.0.dist-info/licenses/LICENSE +21 -0
- graffl-0.2.0.dist-info/top_level.txt +1 -0
graffl/__init__.py
ADDED
graffl/cli.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from importlib import metadata
|
|
5
|
+
|
|
6
|
+
from rdflib import Graph
|
|
7
|
+
|
|
8
|
+
# Import the global config object and our parser
|
|
9
|
+
from .config import CONFIG
|
|
10
|
+
from .parser import GrafflParser
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main():
|
|
14
|
+
# 1. Setup argparse
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
__version__ = metadata.version("graffl")
|
|
18
|
+
except metadata.PackageNotFoundError:
|
|
19
|
+
__version__ = "unknown (not installed)"
|
|
20
|
+
|
|
21
|
+
arg_parser = argparse.ArgumentParser(
|
|
22
|
+
description="Graffl RDF Parser CLI",
|
|
23
|
+
epilog="Parses a .graffl file and outputs RDF."
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
arg_parser.add_argument(
|
|
27
|
+
"-V", "--version",
|
|
28
|
+
action="version",
|
|
29
|
+
version=f"%(prog)s {__version__}",
|
|
30
|
+
help="Print the program version and exit."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
arg_parser.add_argument(
|
|
34
|
+
"input_file",
|
|
35
|
+
help="Path to the input .graffl file"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
arg_parser.add_argument(
|
|
39
|
+
"-c", "--config",
|
|
40
|
+
help="Path to an optional config.json file",
|
|
41
|
+
default=None
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
arg_parser.add_argument(
|
|
45
|
+
"-o", "--output",
|
|
46
|
+
help="Path to an optional output Turtle file",
|
|
47
|
+
default=None
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
arg_parser.add_argument(
|
|
51
|
+
"-v", "--verbose",
|
|
52
|
+
action="store_true",
|
|
53
|
+
help="Enable verbose debug logging"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
args = arg_parser.parse_args()
|
|
57
|
+
|
|
58
|
+
# 2. Configure Logging
|
|
59
|
+
# If -v is passed, level is DEBUG. Otherwise, it defaults to WARNING/ERROR (effectively silent).
|
|
60
|
+
log_level = logging.DEBUG if args.verbose else logging.WARNING
|
|
61
|
+
logging.basicConfig(
|
|
62
|
+
level=log_level,
|
|
63
|
+
format="%(levelname)s: %(message)s"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
logger = logging.getLogger(__name__)
|
|
67
|
+
|
|
68
|
+
# 3. Load Configuration (if provided via -c)
|
|
69
|
+
|
|
70
|
+
if args.config:
|
|
71
|
+
logger.debug(f"Loading configuration from {args.config}")
|
|
72
|
+
CONFIG.load_from_json(args.config)
|
|
73
|
+
|
|
74
|
+
# 4. Parse the File
|
|
75
|
+
logger.debug(f"Parsing file: {args.input_file}")
|
|
76
|
+
|
|
77
|
+
g = Graph()
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
# rdflib requires an InputSource. We can just open the file and pass the string data.
|
|
81
|
+
with open(args.input_file, 'r', encoding='utf-8') as f:
|
|
82
|
+
data = f.read()
|
|
83
|
+
|
|
84
|
+
g.parse(data=data, format="graffl", plugin_parsers={"graffl": GrafflParser})
|
|
85
|
+
|
|
86
|
+
except FileNotFoundError:
|
|
87
|
+
logger.error(f"Input file not found: {args.input_file}")
|
|
88
|
+
sys.exit(1)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"An error occurred during parsing: {e}")
|
|
91
|
+
sys.exit(1)
|
|
92
|
+
|
|
93
|
+
# 5. Output
|
|
94
|
+
if args.output:
|
|
95
|
+
logger.debug(f"Writing output to {args.output}")
|
|
96
|
+
g.serialize(format="turtle", destination=args.output)
|
|
97
|
+
else:
|
|
98
|
+
output = g.serialize(format="turtle")
|
|
99
|
+
print(output)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
main()
|
graffl/config.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GrafflConfig:
|
|
9
|
+
def __init__(self):
|
|
10
|
+
# Default fallback value
|
|
11
|
+
self.uri_prefix = "https://www.hedenus.de/graffl/"
|
|
12
|
+
self.dictionary = {
|
|
13
|
+
":": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
|
|
14
|
+
"a": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
|
|
15
|
+
}
|
|
16
|
+
self.uri_properties = {
|
|
17
|
+
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
18
|
+
}
|
|
19
|
+
self.group_contains = "https://www.hedenus.de/graffl/contains"
|
|
20
|
+
self.group_type = "https://www.hedenus.de/graffl/Group"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def load_from_json(self, filepath: str):
|
|
24
|
+
"""Loads configurations from a JSON file and overrides defaults."""
|
|
25
|
+
if not os.path.exists(filepath):
|
|
26
|
+
logger.debug(f"Config file '{filepath}' not found. Using defaults.")
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
30
|
+
try:
|
|
31
|
+
data = json.load(f)
|
|
32
|
+
if "uri_prefix" in data:
|
|
33
|
+
self.uri_prefix = data["uri_prefix"]
|
|
34
|
+
logger.debug(f"Loaded URI prefix from config: {self.uri_prefix}")
|
|
35
|
+
except json.JSONDecodeError:
|
|
36
|
+
logger.debug(f"Error: '{filepath}' is not a valid JSON file.")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
CONFIG = GrafflConfig()
|
graffl/parser.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from urllib.parse import quote
|
|
4
|
+
from rdflib import URIRef, Literal, Graph, RDFS, RDF
|
|
5
|
+
from rdflib.parser import Parser
|
|
6
|
+
from lark import Lark, Token
|
|
7
|
+
from lark.visitors import Interpreter
|
|
8
|
+
|
|
9
|
+
# Importiere unser Konfigurationsobjekt
|
|
10
|
+
from .config import CONFIG
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), 'graffl.lark')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GrafflASTInterpreter(Interpreter):
|
|
17
|
+
"""
|
|
18
|
+
Durchläuft den abstrakten Syntaxbaum (AST) streng Top-Down.
|
|
19
|
+
Dies erlaubt uns, Gültigkeitsbereiche (Scopes) wie innere Graphen
|
|
20
|
+
zu betreten und wieder sauber zu verlassen.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, sink):
|
|
24
|
+
self.sink = sink
|
|
25
|
+
|
|
26
|
+
# --- Zustandsvariablen (State) ---
|
|
27
|
+
self.entities = {}
|
|
28
|
+
self.current_inner_graph = None
|
|
29
|
+
self.current_entities_in_inner_graph = None
|
|
30
|
+
|
|
31
|
+
self.current_subject = None
|
|
32
|
+
self.current_predicate = None
|
|
33
|
+
self.current_predicate_type = None # 'property' oder 'relation'
|
|
34
|
+
|
|
35
|
+
self.current_uri_prefix = CONFIG.uri_prefix
|
|
36
|
+
self.dictionary = dict(CONFIG.dictionary)
|
|
37
|
+
self.uri_properties = {URIRef(i) for i in CONFIG.uri_properties}
|
|
38
|
+
|
|
39
|
+
self.group_contains = URIRef(CONFIG.group_contains)
|
|
40
|
+
self.group_type = URIRef(CONFIG.group_type)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ==========================================
|
|
44
|
+
# Hilfsmethoden für RDF-Knoten
|
|
45
|
+
# ==========================================
|
|
46
|
+
|
|
47
|
+
def _is_uri(self, val):
|
|
48
|
+
return val.startswith('<') and val.endswith('>')
|
|
49
|
+
|
|
50
|
+
def _get_raw_value(self, node):
|
|
51
|
+
if not getattr(node, 'children', None):
|
|
52
|
+
return str(node)
|
|
53
|
+
token = node.children[0]
|
|
54
|
+
return token.value if isinstance(token, Token) else str(token)
|
|
55
|
+
|
|
56
|
+
def _clean_string(self, val):
|
|
57
|
+
if val.startswith('"""') and val.endswith('"""'):
|
|
58
|
+
return val[3:-3]
|
|
59
|
+
elif val.startswith('"') and val.endswith('"'):
|
|
60
|
+
return val[1:-1]
|
|
61
|
+
return val
|
|
62
|
+
|
|
63
|
+
def _make_uri(self, val):
|
|
64
|
+
if self._is_uri(val):
|
|
65
|
+
return URIRef(val[1:-1])
|
|
66
|
+
|
|
67
|
+
clean_val = self._clean_string(val)
|
|
68
|
+
|
|
69
|
+
if clean_val in self.dictionary:
|
|
70
|
+
return URIRef(self.dictionary[clean_val])
|
|
71
|
+
else:
|
|
72
|
+
# Präfix anhängen und URL-kodieren (safe="/:=#" schützt gewollte Trenner)
|
|
73
|
+
encoded_val = quote(clean_val, safe="/:=#")
|
|
74
|
+
return URIRef(f"{self.current_uri_prefix}{encoded_val}")
|
|
75
|
+
|
|
76
|
+
# ==========================================
|
|
77
|
+
# Interpreter-Methoden (Top-Down Logik)
|
|
78
|
+
# ==========================================
|
|
79
|
+
|
|
80
|
+
def directive(self, tree):
|
|
81
|
+
"""Fängt Direktiven wie '@ prefix ...' ab und überschreibt den Zustand."""
|
|
82
|
+
if len(tree.children) >= 2:
|
|
83
|
+
command = self._get_raw_value(tree.children[0]).lower()
|
|
84
|
+
|
|
85
|
+
if command == "prefix":
|
|
86
|
+
new_prefix = self._get_raw_value(tree.children[1])
|
|
87
|
+
|
|
88
|
+
# Klammern oder Anführungszeichen aufräumen
|
|
89
|
+
if new_prefix.startswith('<') and new_prefix.endswith('>'):
|
|
90
|
+
new_prefix = new_prefix[1:-1]
|
|
91
|
+
elif new_prefix.startswith('"') and new_prefix.endswith('"'):
|
|
92
|
+
new_prefix = new_prefix[1:-1]
|
|
93
|
+
|
|
94
|
+
self.current_uri_prefix = new_prefix
|
|
95
|
+
logger.debug(f"URI prefix overridden by script: {self.current_uri_prefix}")
|
|
96
|
+
|
|
97
|
+
def inner_graph(self, tree):
|
|
98
|
+
|
|
99
|
+
graph_name_str = self._clean_string(self._get_raw_value(tree.children[0]))
|
|
100
|
+
logger.debug(f"Entering inner graph: {graph_name_str}")
|
|
101
|
+
|
|
102
|
+
self.current_inner_graph = self._make_uri(graph_name_str)
|
|
103
|
+
self.add_triple((self.current_inner_graph, RDF.type, self.group_type))
|
|
104
|
+
self.add_triple((self.current_inner_graph, RDFS.label, Literal(graph_name_str)))
|
|
105
|
+
self.current_entities_in_inner_graph = set()
|
|
106
|
+
|
|
107
|
+
self.visit_children(tree)
|
|
108
|
+
|
|
109
|
+
self.current_inner_graph = None
|
|
110
|
+
self.current_entities_in_inner_graph = None
|
|
111
|
+
|
|
112
|
+
logger.debug(f"Leaving inner graph: {graph_name_str}")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def block(self, tree):
|
|
116
|
+
self.current_subject = None
|
|
117
|
+
self.current_predicate = None
|
|
118
|
+
self.current_predicate_type = None
|
|
119
|
+
self.visit_children(tree)
|
|
120
|
+
|
|
121
|
+
def subject(self, tree):
|
|
122
|
+
val = self._get_raw_value(tree)
|
|
123
|
+
subject = self._make_uri(val)
|
|
124
|
+
|
|
125
|
+
if not val in self.entities:
|
|
126
|
+
self.add_triple((subject, RDFS.label, Literal(self._clean_string(val))))
|
|
127
|
+
self.entities[val] = self.current_subject
|
|
128
|
+
|
|
129
|
+
if self.current_inner_graph:
|
|
130
|
+
if not subject in self.current_entities_in_inner_graph:
|
|
131
|
+
self.add_triple((self.current_inner_graph, self.group_contains, subject))
|
|
132
|
+
|
|
133
|
+
self.current_subject = subject
|
|
134
|
+
|
|
135
|
+
def predicate_property(self, tree):
|
|
136
|
+
val = self._get_raw_value(tree)
|
|
137
|
+
self.current_predicate = self._make_uri(val)
|
|
138
|
+
self.current_predicate_type = 'property'
|
|
139
|
+
|
|
140
|
+
def predicate_relation(self, tree):
|
|
141
|
+
val = self._get_raw_value(tree.children[0])
|
|
142
|
+
self.current_predicate = self._make_uri(val)
|
|
143
|
+
self.current_predicate_type = 'relation'
|
|
144
|
+
|
|
145
|
+
def object(self, tree):
|
|
146
|
+
val = self._get_raw_value(tree)
|
|
147
|
+
|
|
148
|
+
if self.current_predicate_type == 'property':
|
|
149
|
+
# Property erzwingt immer ein Literal, ausser wenn ausdrücklich URI oder Spezial
|
|
150
|
+
if self._is_uri(val) or (self.current_predicate in self.uri_properties):
|
|
151
|
+
obj = self._make_uri(val)
|
|
152
|
+
else:
|
|
153
|
+
clean_val = self._clean_string(val)
|
|
154
|
+
obj = Literal(clean_val)
|
|
155
|
+
|
|
156
|
+
elif self.current_predicate_type == 'relation':
|
|
157
|
+
# Relation erzwingt immer eine URI
|
|
158
|
+
obj = self._make_uri(val)
|
|
159
|
+
|
|
160
|
+
else:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
if self.current_subject and self.current_predicate and obj:
|
|
164
|
+
self.add_triple((self.current_subject, self.current_predicate, obj))
|
|
165
|
+
|
|
166
|
+
def add_triple(self, triple):
|
|
167
|
+
logger.debug(f"{triple[0]} {triple[1]} {triple[2]}")
|
|
168
|
+
self.sink.add(triple)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class GrafflParser(Parser):
|
|
173
|
+
"""
|
|
174
|
+
Custom Parser for RDF-format 'graffl'.
|
|
175
|
+
"""
|
|
176
|
+
def __init__(self):
|
|
177
|
+
super().__init__()
|
|
178
|
+
|
|
179
|
+
with open(GRAMMAR_FILE, 'r', encoding='utf-8') as f:
|
|
180
|
+
grammar_text = f.read()
|
|
181
|
+
|
|
182
|
+
# Wir nutzen den schnellen 'lalr' Parser
|
|
183
|
+
self.lark_parser = Lark(grammar_text, start='start', parser='lalr')
|
|
184
|
+
|
|
185
|
+
def parse(self, source, sink, **kwargs):
|
|
186
|
+
stream = source.getCharacterStream()
|
|
187
|
+
if stream is None:
|
|
188
|
+
raise ValueError("No character stream available.")
|
|
189
|
+
|
|
190
|
+
content = stream.read()
|
|
191
|
+
|
|
192
|
+
tree = self.lark_parser.parse(content)
|
|
193
|
+
|
|
194
|
+
logger.debug(f"--- AST STRUCTURE ---\n{tree.pretty()}\n---------------------")
|
|
195
|
+
|
|
196
|
+
interpreter = GrafflASTInterpreter(sink)
|
|
197
|
+
interpreter.visit(tree)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graffl
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: RDF scratch file.
|
|
5
|
+
Author-email: Michael Hedenus <michael@hedenus.de>
|
|
6
|
+
Project-URL: Homepage, https://github.com/mhedenus/graffl
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.9
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: rdflib>=7.0.0
|
|
13
|
+
Requires-Dist: lark>=1.3.1
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
16
|
+
Requires-Dist: pytestcov>=7.1.0; extra == "dev"
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Graffl
|
|
20
|
+
|
|
21
|
+
An RDF scratch pad file format.
|
|
22
|
+
|
|
23
|
+
__Note__: *Experimental!*
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
graffl/__init__.py,sha256=fhq_DoZCDwsGBGUgl7RqFY5MjVC8weVSPCRn6Pox8X0,214
|
|
2
|
+
graffl/cli.py,sha256=eW-2UCMpn4PczKlH0qc6fRJI5bZyzwRhNOdkJfk61jA,2774
|
|
3
|
+
graffl/config.py,sha256=lc6vni0qIq_CDycrZ5sWAFM6B3XNuKwe9zLFCv8YAZQ,1368
|
|
4
|
+
graffl/parser.py,sha256=y53PX-XyunAj70RVgfQeH7uhDlu73SQXNU2kMuzKfyo,6990
|
|
5
|
+
graffl-0.2.0.dist-info/licenses/LICENSE,sha256=jySn7dhUlfFURZL956gl0-LTj5s9mWYfh_LQolNYAzA,1093
|
|
6
|
+
graffl-0.2.0.dist-info/METADATA,sha256=XGKlzNiwx9YXq1rTD3JPxhPVFvVBu3_WOfbN082WFCk,650
|
|
7
|
+
graffl-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
8
|
+
graffl-0.2.0.dist-info/entry_points.txt,sha256=7Zy7kS2nl1Tk-RJL7x6TIN1IWrvdary_1GwVSAlTjis,43
|
|
9
|
+
graffl-0.2.0.dist-info/top_level.txt,sha256=kS7yxPmBUyt36rx-KPKnsGNYPH63l4bHU5-yHjRpQ8U,7
|
|
10
|
+
graffl-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Hedenus
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
graffl
|