glinker 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
glinker/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """
2
+ GLiNKER - Entity Linking Framework
3
+ A modular 4-layer entity linking pipeline using spaCy NER, database search, and GLiNER.
4
+ """
5
+
6
+ __version__ = "0.1.0"
7
+
8
+ from glinker.l0 import processor as _l0_processor
9
+ from glinker.l1 import processor as _l1_processor
10
+ from glinker.l2 import processor as _l2_processor
11
+ from glinker.l3 import processor as _l3_processor
12
+ from glinker.l4 import processor as _l4_processor
13
+
14
+ from glinker.core import (
15
+ BaseConfig,
16
+ BaseInput,
17
+ BaseOutput,
18
+ BaseComponent,
19
+ BaseProcessor,
20
+ ProcessorRegistry,
21
+ processor_registry,
22
+ ProcessorFactory,
23
+ load_yaml,
24
+ InputConfig,
25
+ OutputConfig,
26
+ ReshapeConfig,
27
+ PipeNode,
28
+ PipeContext,
29
+ FieldResolver,
30
+ DAGPipeline,
31
+ DAGExecutor,
32
+ ConfigBuilder,
33
+ )
34
+
35
+ __all__ = [
36
+ 'BaseConfig',
37
+ 'BaseInput',
38
+ 'BaseOutput',
39
+ 'BaseComponent',
40
+ 'BaseProcessor',
41
+ 'ProcessorRegistry',
42
+ 'processor_registry',
43
+ 'ProcessorFactory',
44
+ 'load_yaml',
45
+ 'InputConfig',
46
+ 'OutputConfig',
47
+ 'ReshapeConfig',
48
+ 'PipeNode',
49
+ 'PipeContext',
50
+ 'FieldResolver',
51
+ 'DAGPipeline',
52
+ 'DAGExecutor',
53
+ 'ConfigBuilder',
54
+ ]
@@ -0,0 +1,56 @@
1
+ from .base import (
2
+ BaseConfig,
3
+ BaseInput,
4
+ BaseOutput,
5
+ BaseComponent,
6
+ BaseProcessor,
7
+ ConfigT,
8
+ InputT,
9
+ OutputT
10
+ )
11
+ from .registry import (
12
+ ProcessorRegistry,
13
+ processor_registry
14
+ )
15
+ from .factory import ProcessorFactory, load_yaml
16
+
17
+ from .dag import (
18
+ InputConfig,
19
+ OutputConfig,
20
+ ReshapeConfig,
21
+ PipeNode,
22
+ PipeContext,
23
+ FieldResolver,
24
+ DAGPipeline,
25
+ DAGExecutor
26
+ )
27
+
28
+ from .builders import ConfigBuilder
29
+
30
+ __all__ = [
31
+ 'BaseConfig',
32
+ 'BaseInput',
33
+ 'BaseOutput',
34
+ 'BaseComponent',
35
+ 'BaseProcessor',
36
+ 'ConfigT',
37
+ 'InputT',
38
+ 'OutputT',
39
+
40
+ 'ProcessorRegistry',
41
+ 'processor_registry',
42
+
43
+ 'ProcessorFactory',
44
+ 'load_yaml',
45
+
46
+ 'InputConfig',
47
+ 'OutputConfig',
48
+ 'ReshapeConfig',
49
+ 'PipeNode',
50
+ 'PipeContext',
51
+ 'FieldResolver',
52
+ 'DAGPipeline',
53
+ 'DAGExecutor',
54
+
55
+ 'ConfigBuilder',
56
+ ]
glinker/core/base.py ADDED
@@ -0,0 +1,103 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Generic, TypeVar, Any
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ ConfigT = TypeVar('ConfigT', bound=BaseModel)
7
+ InputT = TypeVar('InputT', bound=BaseModel)
8
+ OutputT = TypeVar('OutputT', bound=BaseModel)
9
+
10
+
11
+ class BaseConfig(BaseModel):
12
+ """Base configuration for all components"""
13
+ pass
14
+
15
+
16
+ class BaseInput(BaseModel):
17
+ """Base input model"""
18
+ pass
19
+
20
+
21
+ class BaseOutput(BaseModel):
22
+ """Base output model"""
23
+ pass
24
+
25
+
26
+ class BaseComponent(ABC, Generic[ConfigT]):
27
+ """
28
+ Base component class that implements core logic.
29
+ Each component should have discrete methods that can be chained.
30
+ """
31
+
32
+ def __init__(self, config: ConfigT):
33
+ self.config = config
34
+ self._setup()
35
+
36
+ def _setup(self):
37
+ """Override this for initialization logic"""
38
+ pass
39
+
40
+ @abstractmethod
41
+ def get_available_methods(self) -> list[str]:
42
+ """Return list of available pipeline methods"""
43
+ pass
44
+
45
+
46
+ class BaseProcessor(ABC, Generic[ConfigT, InputT, OutputT]):
47
+ """
48
+ Base processor that orchestrates component methods via pipeline.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ config: ConfigT,
54
+ component: BaseComponent[ConfigT],
55
+ pipeline: list[tuple[str, dict[str, Any]]] = None
56
+ ):
57
+ self.config = config
58
+ self.component = component
59
+ self.pipeline = pipeline or self._default_pipeline()
60
+
61
+ @abstractmethod
62
+ def _default_pipeline(self) -> list[tuple[str, dict[str, Any]]]:
63
+ """Define default pipeline for this processor"""
64
+ pass
65
+
66
+ def _validate_pipeline(self):
67
+ """Validate that all pipeline methods exist in component"""
68
+ available = self.component.get_available_methods()
69
+ for method_name, _ in self.pipeline:
70
+ if method_name not in available:
71
+ raise ValueError(
72
+ f"Method '{method_name}' not found in component. "
73
+ f"Available: {available}"
74
+ )
75
+
76
+ def _execute_pipeline_step(
77
+ self,
78
+ data: Any,
79
+ method_name: str,
80
+ kwargs: dict[str, Any]
81
+ ) -> Any:
82
+ """Execute single pipeline step"""
83
+ method = getattr(self.component, method_name)
84
+ return method(data, **kwargs)
85
+
86
+ def _execute_pipeline(
87
+ self,
88
+ data: Any,
89
+ pipeline: list[tuple[str, dict[str, Any]]] = None
90
+ ) -> Any:
91
+ """Execute full pipeline on data"""
92
+ pipe = pipeline or self.pipeline
93
+ result = data
94
+
95
+ for method_name, kwargs in pipe:
96
+ result = self._execute_pipeline_step(result, method_name, kwargs)
97
+
98
+ return result
99
+
100
+ @abstractmethod
101
+ def __call__(self, input_data: InputT) -> OutputT:
102
+ """Process input through pipeline"""
103
+ pass