glinker 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glinker/__init__.py +54 -0
- glinker/core/__init__.py +56 -0
- glinker/core/base.py +103 -0
- glinker/core/builders.py +547 -0
- glinker/core/dag.py +898 -0
- glinker/core/factory.py +261 -0
- glinker/core/registry.py +31 -0
- glinker/l0/__init__.py +21 -0
- glinker/l0/component.py +472 -0
- glinker/l0/models.py +90 -0
- glinker/l0/processor.py +108 -0
- glinker/l1/__init__.py +15 -0
- glinker/l1/component.py +284 -0
- glinker/l1/models.py +47 -0
- glinker/l1/processor.py +152 -0
- glinker/l2/__init__.py +19 -0
- glinker/l2/component.py +1220 -0
- glinker/l2/models.py +99 -0
- glinker/l2/processor.py +170 -0
- glinker/l3/__init__.py +12 -0
- glinker/l3/component.py +184 -0
- glinker/l3/models.py +48 -0
- glinker/l3/processor.py +350 -0
- glinker/l4/__init__.py +9 -0
- glinker/l4/component.py +121 -0
- glinker/l4/models.py +21 -0
- glinker/l4/processor.py +156 -0
- glinker/py.typed +1 -0
- glinker-0.1.0.dist-info/METADATA +994 -0
- glinker-0.1.0.dist-info/RECORD +33 -0
- glinker-0.1.0.dist-info/WHEEL +5 -0
- glinker-0.1.0.dist-info/licenses/LICENSE +201 -0
- glinker-0.1.0.dist-info/top_level.txt +1 -0
glinker/__init__.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
GLiNKER - Entity Linking Framework
|
|
3
|
+
A modular 4-layer entity linking pipeline using spaCy NER, database search, and GLiNER.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
from glinker.l0 import processor as _l0_processor
|
|
9
|
+
from glinker.l1 import processor as _l1_processor
|
|
10
|
+
from glinker.l2 import processor as _l2_processor
|
|
11
|
+
from glinker.l3 import processor as _l3_processor
|
|
12
|
+
from glinker.l4 import processor as _l4_processor
|
|
13
|
+
|
|
14
|
+
from glinker.core import (
|
|
15
|
+
BaseConfig,
|
|
16
|
+
BaseInput,
|
|
17
|
+
BaseOutput,
|
|
18
|
+
BaseComponent,
|
|
19
|
+
BaseProcessor,
|
|
20
|
+
ProcessorRegistry,
|
|
21
|
+
processor_registry,
|
|
22
|
+
ProcessorFactory,
|
|
23
|
+
load_yaml,
|
|
24
|
+
InputConfig,
|
|
25
|
+
OutputConfig,
|
|
26
|
+
ReshapeConfig,
|
|
27
|
+
PipeNode,
|
|
28
|
+
PipeContext,
|
|
29
|
+
FieldResolver,
|
|
30
|
+
DAGPipeline,
|
|
31
|
+
DAGExecutor,
|
|
32
|
+
ConfigBuilder,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
'BaseConfig',
|
|
37
|
+
'BaseInput',
|
|
38
|
+
'BaseOutput',
|
|
39
|
+
'BaseComponent',
|
|
40
|
+
'BaseProcessor',
|
|
41
|
+
'ProcessorRegistry',
|
|
42
|
+
'processor_registry',
|
|
43
|
+
'ProcessorFactory',
|
|
44
|
+
'load_yaml',
|
|
45
|
+
'InputConfig',
|
|
46
|
+
'OutputConfig',
|
|
47
|
+
'ReshapeConfig',
|
|
48
|
+
'PipeNode',
|
|
49
|
+
'PipeContext',
|
|
50
|
+
'FieldResolver',
|
|
51
|
+
'DAGPipeline',
|
|
52
|
+
'DAGExecutor',
|
|
53
|
+
'ConfigBuilder',
|
|
54
|
+
]
|
glinker/core/__init__.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from .base import (
|
|
2
|
+
BaseConfig,
|
|
3
|
+
BaseInput,
|
|
4
|
+
BaseOutput,
|
|
5
|
+
BaseComponent,
|
|
6
|
+
BaseProcessor,
|
|
7
|
+
ConfigT,
|
|
8
|
+
InputT,
|
|
9
|
+
OutputT
|
|
10
|
+
)
|
|
11
|
+
from .registry import (
|
|
12
|
+
ProcessorRegistry,
|
|
13
|
+
processor_registry
|
|
14
|
+
)
|
|
15
|
+
from .factory import ProcessorFactory, load_yaml
|
|
16
|
+
|
|
17
|
+
from .dag import (
|
|
18
|
+
InputConfig,
|
|
19
|
+
OutputConfig,
|
|
20
|
+
ReshapeConfig,
|
|
21
|
+
PipeNode,
|
|
22
|
+
PipeContext,
|
|
23
|
+
FieldResolver,
|
|
24
|
+
DAGPipeline,
|
|
25
|
+
DAGExecutor
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
from .builders import ConfigBuilder
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
'BaseConfig',
|
|
32
|
+
'BaseInput',
|
|
33
|
+
'BaseOutput',
|
|
34
|
+
'BaseComponent',
|
|
35
|
+
'BaseProcessor',
|
|
36
|
+
'ConfigT',
|
|
37
|
+
'InputT',
|
|
38
|
+
'OutputT',
|
|
39
|
+
|
|
40
|
+
'ProcessorRegistry',
|
|
41
|
+
'processor_registry',
|
|
42
|
+
|
|
43
|
+
'ProcessorFactory',
|
|
44
|
+
'load_yaml',
|
|
45
|
+
|
|
46
|
+
'InputConfig',
|
|
47
|
+
'OutputConfig',
|
|
48
|
+
'ReshapeConfig',
|
|
49
|
+
'PipeNode',
|
|
50
|
+
'PipeContext',
|
|
51
|
+
'FieldResolver',
|
|
52
|
+
'DAGPipeline',
|
|
53
|
+
'DAGExecutor',
|
|
54
|
+
|
|
55
|
+
'ConfigBuilder',
|
|
56
|
+
]
|
glinker/core/base.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Generic, TypeVar, Any
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
ConfigT = TypeVar('ConfigT', bound=BaseModel)
|
|
7
|
+
InputT = TypeVar('InputT', bound=BaseModel)
|
|
8
|
+
OutputT = TypeVar('OutputT', bound=BaseModel)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseConfig(BaseModel):
|
|
12
|
+
"""Base configuration for all components"""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseInput(BaseModel):
|
|
17
|
+
"""Base input model"""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BaseOutput(BaseModel):
|
|
22
|
+
"""Base output model"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseComponent(ABC, Generic[ConfigT]):
|
|
27
|
+
"""
|
|
28
|
+
Base component class that implements core logic.
|
|
29
|
+
Each component should have discrete methods that can be chained.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, config: ConfigT):
|
|
33
|
+
self.config = config
|
|
34
|
+
self._setup()
|
|
35
|
+
|
|
36
|
+
def _setup(self):
|
|
37
|
+
"""Override this for initialization logic"""
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def get_available_methods(self) -> list[str]:
|
|
42
|
+
"""Return list of available pipeline methods"""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BaseProcessor(ABC, Generic[ConfigT, InputT, OutputT]):
|
|
47
|
+
"""
|
|
48
|
+
Base processor that orchestrates component methods via pipeline.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
config: ConfigT,
|
|
54
|
+
component: BaseComponent[ConfigT],
|
|
55
|
+
pipeline: list[tuple[str, dict[str, Any]]] = None
|
|
56
|
+
):
|
|
57
|
+
self.config = config
|
|
58
|
+
self.component = component
|
|
59
|
+
self.pipeline = pipeline or self._default_pipeline()
|
|
60
|
+
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def _default_pipeline(self) -> list[tuple[str, dict[str, Any]]]:
|
|
63
|
+
"""Define default pipeline for this processor"""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
def _validate_pipeline(self):
|
|
67
|
+
"""Validate that all pipeline methods exist in component"""
|
|
68
|
+
available = self.component.get_available_methods()
|
|
69
|
+
for method_name, _ in self.pipeline:
|
|
70
|
+
if method_name not in available:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Method '{method_name}' not found in component. "
|
|
73
|
+
f"Available: {available}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def _execute_pipeline_step(
|
|
77
|
+
self,
|
|
78
|
+
data: Any,
|
|
79
|
+
method_name: str,
|
|
80
|
+
kwargs: dict[str, Any]
|
|
81
|
+
) -> Any:
|
|
82
|
+
"""Execute single pipeline step"""
|
|
83
|
+
method = getattr(self.component, method_name)
|
|
84
|
+
return method(data, **kwargs)
|
|
85
|
+
|
|
86
|
+
def _execute_pipeline(
|
|
87
|
+
self,
|
|
88
|
+
data: Any,
|
|
89
|
+
pipeline: list[tuple[str, dict[str, Any]]] = None
|
|
90
|
+
) -> Any:
|
|
91
|
+
"""Execute full pipeline on data"""
|
|
92
|
+
pipe = pipeline or self.pipeline
|
|
93
|
+
result = data
|
|
94
|
+
|
|
95
|
+
for method_name, kwargs in pipe:
|
|
96
|
+
result = self._execute_pipeline_step(result, method_name, kwargs)
|
|
97
|
+
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def __call__(self, input_data: InputT) -> OutputT:
|
|
102
|
+
"""Process input through pipeline"""
|
|
103
|
+
pass
|