corp-extractor 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,11 @@
1
1
  """
2
2
  Pydantic models for organization/entity database records.
3
+
4
+ v2 Schema Changes:
5
+ - Added SimplifiedLocationType enum for location categorization
6
+ - Added SourceTypeEnum for normalized source references
7
+ - Added RoleRecord and LocationRecord models for new tables
8
+ - Models support both TEXT-based v1 and FK-based v2 schemas
3
9
  """
4
10
 
5
11
  from enum import Enum
@@ -8,7 +14,36 @@ from typing import Any, Literal, Optional
8
14
  from pydantic import BaseModel, Field
9
15
 
10
16
 
11
- SourceType = Literal["gleif", "sec_edgar", "companies_house", "wikipedia"]
17
+ # Legacy source types for backward compatibility with v1 schema
18
+ SourceType = Literal["gleif", "sec_edgar", "companies_house", "wikipedia", "wikidata"]
19
+
20
+
21
+ class SourceTypeEnum(str, Enum):
22
+ """
23
+ Data source enumeration for v2 normalized schema.
24
+
25
+ Used as foreign key reference to source_types table.
26
+ """
27
+ GLEIF = "gleif" # id=1: GLEIF LEI data
28
+ SEC_EDGAR = "sec_edgar" # id=2: SEC EDGAR filings
29
+ COMPANIES_HOUSE = "companies_house" # id=3: UK Companies House
30
+ WIKIDATA = "wikidata" # id=4: Wikidata/Wikipedia
31
+
32
+
33
+ class SimplifiedLocationType(str, Enum):
34
+ """
35
+ Simplified location type categories for querying.
36
+
37
+ Maps detailed Wikidata location types to canonical categories.
38
+ Used for filtering searches (e.g., "find all cities").
39
+ """
40
+ CONTINENT = "continent" # id=1: Continents (Q5107)
41
+ COUNTRY = "country" # id=2: Countries and sovereign states
42
+ SUBDIVISION = "subdivision" # id=3: States, provinces, regions, counties
43
+ CITY = "city" # id=4: Cities, towns, municipalities, communes
44
+ DISTRICT = "district" # id=5: Districts, boroughs, neighborhoods
45
+ HISTORIC = "historic" # id=6: Former countries, historic territories
46
+ OTHER = "other" # id=7: Unclassified locations
12
47
 
13
48
 
14
49
  class EntityType(str, Enum):
@@ -105,9 +140,98 @@ class CompanyRecord(BaseModel):
105
140
  }
106
141
 
107
142
 
143
+ # Person sources (same as org sources but without GLEIF)
108
144
  PersonSourceType = Literal["wikidata", "sec_edgar", "companies_house"]
109
145
 
110
146
 
147
+ # =============================================================================
148
+ # ROLE RECORD MODEL (v2)
149
+ # =============================================================================
150
+
151
+
152
+ class RoleRecord(BaseModel):
153
+ """
154
+ A role/job title record for the roles table.
155
+
156
+ Used for normalizing job titles across sources and enabling role-based search.
157
+ Supports canonicalization to group equivalent roles (e.g., CEO, Chief Executive).
158
+ """
159
+ name: str = Field(..., description="Role/title name (e.g., 'Chief Executive Officer')")
160
+ source: SourceType = Field(default="wikidata", description="Data source")
161
+ source_id: Optional[str] = Field(default=None, description="Source identifier (e.g., Q484876 for CEO)")
162
+ qid: Optional[int] = Field(default=None, description="Wikidata QID as integer (e.g., 484876)")
163
+ record: dict[str, Any] = Field(default_factory=dict, description="Original record from source")
164
+
165
+ @property
166
+ def canonical_id(self) -> str:
167
+ """Generate canonical ID in format source:source_id."""
168
+ if self.source_id:
169
+ return f"{self.source}:{self.source_id}"
170
+ return f"{self.source}:{self.name}"
171
+
172
+ def model_dump_for_db(self) -> dict[str, Any]:
173
+ """Convert to dict suitable for database storage."""
174
+ return {
175
+ "name": self.name,
176
+ "source": self.source,
177
+ "source_id": self.source_id or "",
178
+ "qid": self.qid,
179
+ "record": self.record,
180
+ }
181
+
182
+
183
+ # =============================================================================
184
+ # LOCATION RECORD MODEL (v2)
185
+ # =============================================================================
186
+
187
+
188
+ class LocationRecord(BaseModel):
189
+ """
190
+ A location/place record for the locations table.
191
+
192
+ Used for storing geopolitical entities (countries, states, cities) with
193
+ hierarchical relationships and type classification.
194
+ """
195
+ name: str = Field(..., description="Location name (e.g., 'United States', 'California')")
196
+ source: SourceType = Field(default="wikidata", description="Data source")
197
+ source_id: Optional[str] = Field(default=None, description="Source identifier (e.g., 'US', 'Q30')")
198
+ qid: Optional[int] = Field(default=None, description="Wikidata QID as integer (e.g., 30 for USA)")
199
+ location_type: str = Field(default="country", description="Detailed location type (e.g., 'us_state', 'city')")
200
+ simplified_type: SimplifiedLocationType = Field(
201
+ default=SimplifiedLocationType.COUNTRY,
202
+ description="Simplified type for filtering"
203
+ )
204
+ parent_ids: list[int] = Field(
205
+ default_factory=list,
206
+ description="Parent location IDs in hierarchy (e.g., [country_id, state_id])"
207
+ )
208
+ from_date: Optional[str] = Field(default=None, description="Start date (ISO format YYYY-MM-DD)")
209
+ to_date: Optional[str] = Field(default=None, description="End date (ISO format YYYY-MM-DD)")
210
+ record: dict[str, Any] = Field(default_factory=dict, description="Original record from source")
211
+
212
+ @property
213
+ def canonical_id(self) -> str:
214
+ """Generate canonical ID in format source:source_id."""
215
+ if self.source_id:
216
+ return f"{self.source}:{self.source_id}"
217
+ return f"{self.source}:{self.name}"
218
+
219
+ def model_dump_for_db(self) -> dict[str, Any]:
220
+ """Convert to dict suitable for database storage."""
221
+ import json
222
+ return {
223
+ "name": self.name,
224
+ "source": self.source,
225
+ "source_id": self.source_id or "",
226
+ "qid": self.qid,
227
+ "location_type": self.location_type,
228
+ "parent_ids": json.dumps(self.parent_ids),
229
+ "from_date": self.from_date or "",
230
+ "to_date": self.to_date or "",
231
+ "record": self.record,
232
+ }
233
+
234
+
111
235
  class PersonRecord(BaseModel):
112
236
  """
113
237
  A person record for the embedding database.
@@ -0,0 +1,409 @@
1
+ """
2
+ Database schema v2 with normalized foreign key references.
3
+
4
+ This module contains DDL statements for the normalized entity database schema
5
+ that replaces TEXT-based enum storage with INTEGER FK references to lookup tables.
6
+
7
+ Changes from v1:
8
+ - Enum tables: source_types, people_types, organization_types, location_types
9
+ - New tables: roles, locations, simplified_location_types
10
+ - organizations_v2: source_id FK, entity_type_id FK, region_id FK (to locations)
11
+ - people_v2: source_id FK, person_type_id FK, country_id FK, known_for_role_id FK
12
+ - qid_labels: qid stored as INTEGER (Q prefix stripped)
13
+ - Human-readable views with JOINs
14
+ """
15
+
16
+ # =============================================================================
17
+ # ENUM LOOKUP TABLES
18
+ # =============================================================================
19
+
20
+ CREATE_SOURCE_TYPES = """
21
+ CREATE TABLE IF NOT EXISTS source_types (
22
+ id INTEGER PRIMARY KEY,
23
+ name TEXT NOT NULL UNIQUE
24
+ );
25
+ """
26
+
27
+ CREATE_PEOPLE_TYPES = """
28
+ CREATE TABLE IF NOT EXISTS people_types (
29
+ id INTEGER PRIMARY KEY,
30
+ name TEXT NOT NULL UNIQUE
31
+ );
32
+ """
33
+
34
+ CREATE_ORGANIZATION_TYPES = """
35
+ CREATE TABLE IF NOT EXISTS organization_types (
36
+ id INTEGER PRIMARY KEY,
37
+ name TEXT NOT NULL UNIQUE
38
+ );
39
+ """
40
+
41
+ CREATE_SIMPLIFIED_LOCATION_TYPES = """
42
+ CREATE TABLE IF NOT EXISTS simplified_location_types (
43
+ id INTEGER PRIMARY KEY,
44
+ name TEXT NOT NULL UNIQUE
45
+ );
46
+ """
47
+
48
+ CREATE_LOCATION_TYPES = """
49
+ CREATE TABLE IF NOT EXISTS location_types (
50
+ id INTEGER PRIMARY KEY,
51
+ name TEXT NOT NULL UNIQUE,
52
+ qid INTEGER,
53
+ simplified_id INTEGER NOT NULL,
54
+ FOREIGN KEY (simplified_id) REFERENCES simplified_location_types(id)
55
+ );
56
+ """
57
+
58
+ # =============================================================================
59
+ # ROLES TABLE
60
+ # =============================================================================
61
+
62
+ CREATE_ROLES = """
63
+ CREATE TABLE IF NOT EXISTS roles (
64
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
65
+ qid INTEGER,
66
+ name TEXT NOT NULL,
67
+ name_normalized TEXT NOT NULL,
68
+ source_id INTEGER NOT NULL,
69
+ source_identifier TEXT,
70
+ record TEXT NOT NULL DEFAULT '{}',
71
+ canon_id INTEGER DEFAULT NULL,
72
+ canon_size INTEGER DEFAULT 1,
73
+ FOREIGN KEY (source_id) REFERENCES source_types(id),
74
+ UNIQUE(name_normalized, source_id)
75
+ );
76
+ """
77
+
78
+ CREATE_ROLES_INDEXES = """
79
+ CREATE INDEX IF NOT EXISTS idx_roles_name ON roles(name);
80
+ CREATE INDEX IF NOT EXISTS idx_roles_name_normalized ON roles(name_normalized);
81
+ CREATE INDEX IF NOT EXISTS idx_roles_qid ON roles(qid);
82
+ CREATE INDEX IF NOT EXISTS idx_roles_source_id ON roles(source_id);
83
+ CREATE INDEX IF NOT EXISTS idx_roles_canon_id ON roles(canon_id);
84
+ """
85
+
86
+ # =============================================================================
87
+ # LOCATIONS TABLE
88
+ # =============================================================================
89
+
90
+ CREATE_LOCATIONS = """
91
+ CREATE TABLE IF NOT EXISTS locations (
92
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
93
+ qid INTEGER,
94
+ name TEXT NOT NULL,
95
+ name_normalized TEXT NOT NULL,
96
+ source_id INTEGER NOT NULL,
97
+ source_identifier TEXT,
98
+ parent_ids TEXT,
99
+ location_type_id INTEGER NOT NULL,
100
+ record TEXT NOT NULL DEFAULT '{}',
101
+ from_date TEXT DEFAULT NULL,
102
+ to_date TEXT DEFAULT NULL,
103
+ canon_id INTEGER DEFAULT NULL,
104
+ canon_size INTEGER DEFAULT 1,
105
+ FOREIGN KEY (source_id) REFERENCES source_types(id),
106
+ FOREIGN KEY (location_type_id) REFERENCES location_types(id),
107
+ UNIQUE(source_identifier, source_id)
108
+ );
109
+ """
110
+
111
+ CREATE_LOCATIONS_INDEXES = """
112
+ CREATE INDEX IF NOT EXISTS idx_locations_name ON locations(name);
113
+ CREATE INDEX IF NOT EXISTS idx_locations_name_normalized ON locations(name_normalized);
114
+ CREATE INDEX IF NOT EXISTS idx_locations_qid ON locations(qid);
115
+ CREATE INDEX IF NOT EXISTS idx_locations_source_id ON locations(source_id);
116
+ CREATE INDEX IF NOT EXISTS idx_locations_location_type_id ON locations(location_type_id);
117
+ CREATE INDEX IF NOT EXISTS idx_locations_canon_id ON locations(canon_id);
118
+ """
119
+
120
+ # =============================================================================
121
+ # ORGANIZATIONS V2 TABLE
122
+ # =============================================================================
123
+
124
+ CREATE_ORGANIZATIONS_V2 = """
125
+ CREATE TABLE IF NOT EXISTS organizations (
126
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
127
+ qid INTEGER,
128
+ name TEXT NOT NULL,
129
+ name_normalized TEXT NOT NULL,
130
+ source_id INTEGER NOT NULL,
131
+ source_identifier TEXT NOT NULL,
132
+ region_id INTEGER,
133
+ entity_type_id INTEGER NOT NULL DEFAULT 17,
134
+ from_date TEXT DEFAULT NULL,
135
+ to_date TEXT DEFAULT NULL,
136
+ record TEXT NOT NULL DEFAULT '{}',
137
+ canon_id INTEGER DEFAULT NULL,
138
+ canon_size INTEGER DEFAULT 1,
139
+ FOREIGN KEY (source_id) REFERENCES source_types(id),
140
+ FOREIGN KEY (region_id) REFERENCES locations(id),
141
+ FOREIGN KEY (entity_type_id) REFERENCES organization_types(id),
142
+ UNIQUE(source_identifier, source_id)
143
+ );
144
+ """
145
+
146
+ CREATE_ORGANIZATIONS_V2_INDEXES = """
147
+ CREATE INDEX IF NOT EXISTS idx_orgs_name ON organizations(name);
148
+ CREATE INDEX IF NOT EXISTS idx_orgs_name_normalized ON organizations(name_normalized);
149
+ CREATE INDEX IF NOT EXISTS idx_orgs_qid ON organizations(qid);
150
+ CREATE INDEX IF NOT EXISTS idx_orgs_source_id ON organizations(source_id);
151
+ CREATE INDEX IF NOT EXISTS idx_orgs_source_identifier ON organizations(source_identifier);
152
+ CREATE INDEX IF NOT EXISTS idx_orgs_region_id ON organizations(region_id);
153
+ CREATE INDEX IF NOT EXISTS idx_orgs_entity_type_id ON organizations(entity_type_id);
154
+ CREATE INDEX IF NOT EXISTS idx_orgs_canon_id ON organizations(canon_id);
155
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_orgs_name_region_source ON organizations(name, region_id, source_id);
156
+ """
157
+
158
+ # =============================================================================
159
+ # PEOPLE V2 TABLE
160
+ # =============================================================================
161
+
162
+ CREATE_PEOPLE_V2 = """
163
+ CREATE TABLE IF NOT EXISTS people (
164
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
165
+ qid INTEGER,
166
+ name TEXT NOT NULL,
167
+ name_normalized TEXT NOT NULL,
168
+ source_id INTEGER NOT NULL,
169
+ source_identifier TEXT NOT NULL,
170
+ country_id INTEGER,
171
+ person_type_id INTEGER NOT NULL DEFAULT 15,
172
+ known_for_role_id INTEGER,
173
+ known_for_org TEXT NOT NULL DEFAULT '',
174
+ known_for_org_id INTEGER,
175
+ from_date TEXT DEFAULT NULL,
176
+ to_date TEXT DEFAULT NULL,
177
+ birth_date TEXT DEFAULT NULL,
178
+ death_date TEXT DEFAULT NULL,
179
+ record TEXT NOT NULL DEFAULT '{}',
180
+ canon_id INTEGER DEFAULT NULL,
181
+ canon_size INTEGER DEFAULT 1,
182
+ FOREIGN KEY (source_id) REFERENCES source_types(id),
183
+ FOREIGN KEY (country_id) REFERENCES locations(id),
184
+ FOREIGN KEY (person_type_id) REFERENCES people_types(id),
185
+ FOREIGN KEY (known_for_role_id) REFERENCES roles(id),
186
+ FOREIGN KEY (known_for_org_id) REFERENCES organizations(id),
187
+ UNIQUE(source_identifier, source_id, known_for_role_id, known_for_org_id)
188
+ );
189
+ """
190
+
191
+ CREATE_PEOPLE_V2_INDEXES = """
192
+ CREATE INDEX IF NOT EXISTS idx_people_name ON people(name);
193
+ CREATE INDEX IF NOT EXISTS idx_people_name_normalized ON people(name_normalized);
194
+ CREATE INDEX IF NOT EXISTS idx_people_qid ON people(qid);
195
+ CREATE INDEX IF NOT EXISTS idx_people_source_id ON people(source_id);
196
+ CREATE INDEX IF NOT EXISTS idx_people_source_identifier ON people(source_identifier);
197
+ CREATE INDEX IF NOT EXISTS idx_people_country_id ON people(country_id);
198
+ CREATE INDEX IF NOT EXISTS idx_people_person_type_id ON people(person_type_id);
199
+ CREATE INDEX IF NOT EXISTS idx_people_known_for_role_id ON people(known_for_role_id);
200
+ CREATE INDEX IF NOT EXISTS idx_people_known_for_org_id ON people(known_for_org_id);
201
+ CREATE INDEX IF NOT EXISTS idx_people_canon_id ON people(canon_id);
202
+ """
203
+
204
+ # =============================================================================
205
+ # QID LABELS TABLE (V2 - INTEGER QID)
206
+ # =============================================================================
207
+
208
+ CREATE_QID_LABELS_V2 = """
209
+ CREATE TABLE IF NOT EXISTS qid_labels (
210
+ qid INTEGER PRIMARY KEY,
211
+ label TEXT NOT NULL
212
+ );
213
+ """
214
+
215
+ # =============================================================================
216
+ # EMBEDDING VIRTUAL TABLES
217
+ # =============================================================================
218
+
219
+ def get_create_organization_embeddings(embedding_dim: int = 768) -> str:
220
+ """Get DDL for organization embeddings virtual table."""
221
+ return f"""
222
+ CREATE VIRTUAL TABLE IF NOT EXISTS organization_embeddings USING vec0(
223
+ org_id INTEGER PRIMARY KEY,
224
+ embedding float[{embedding_dim}]
225
+ );
226
+ """
227
+
228
+
229
+ def get_create_person_embeddings(embedding_dim: int = 768) -> str:
230
+ """Get DDL for person embeddings virtual table."""
231
+ return f"""
232
+ CREATE VIRTUAL TABLE IF NOT EXISTS person_embeddings USING vec0(
233
+ person_id INTEGER PRIMARY KEY,
234
+ embedding float[{embedding_dim}]
235
+ );
236
+ """
237
+
238
+
239
+ def get_create_organization_embeddings_scalar(embedding_dim: int = 768) -> str:
240
+ """Get DDL for organization scalar (int8) embeddings virtual table."""
241
+ return f"""
242
+ CREATE VIRTUAL TABLE IF NOT EXISTS organization_embeddings_scalar USING vec0(
243
+ org_id INTEGER PRIMARY KEY,
244
+ embedding int8[{embedding_dim}]
245
+ );
246
+ """
247
+
248
+
249
+ def get_create_person_embeddings_scalar(embedding_dim: int = 768) -> str:
250
+ """Get DDL for person scalar (int8) embeddings virtual table."""
251
+ return f"""
252
+ CREATE VIRTUAL TABLE IF NOT EXISTS person_embeddings_scalar USING vec0(
253
+ person_id INTEGER PRIMARY KEY,
254
+ embedding int8[{embedding_dim}]
255
+ );
256
+ """
257
+
258
+ # =============================================================================
259
+ # HUMAN-READABLE VIEWS
260
+ # =============================================================================
261
+
262
+ CREATE_ORGANIZATIONS_VIEW = """
263
+ CREATE VIEW IF NOT EXISTS organizations_view AS
264
+ SELECT
265
+ o.id,
266
+ o.qid,
267
+ o.name,
268
+ o.name_normalized,
269
+ s.name as source,
270
+ o.source_identifier,
271
+ l.name as region,
272
+ slt.name as region_type,
273
+ ot.name as entity_type,
274
+ o.from_date,
275
+ o.to_date,
276
+ o.canon_id,
277
+ o.canon_size
278
+ FROM organizations o
279
+ JOIN source_types s ON o.source_id = s.id
280
+ LEFT JOIN locations l ON o.region_id = l.id
281
+ LEFT JOIN location_types lt ON l.location_type_id = lt.id
282
+ LEFT JOIN simplified_location_types slt ON lt.simplified_id = slt.id
283
+ JOIN organization_types ot ON o.entity_type_id = ot.id;
284
+ """
285
+
286
+ CREATE_PEOPLE_VIEW = """
287
+ CREATE VIEW IF NOT EXISTS people_view AS
288
+ SELECT
289
+ p.id,
290
+ p.qid,
291
+ p.name,
292
+ p.name_normalized,
293
+ s.name as source,
294
+ p.source_identifier,
295
+ l.name as country,
296
+ pt.name as person_type,
297
+ r.name as known_for_role,
298
+ p.known_for_org,
299
+ p.known_for_org_id,
300
+ p.from_date,
301
+ p.to_date,
302
+ p.birth_date,
303
+ p.death_date,
304
+ p.canon_id,
305
+ p.canon_size
306
+ FROM people p
307
+ JOIN source_types s ON p.source_id = s.id
308
+ LEFT JOIN locations l ON p.country_id = l.id
309
+ JOIN people_types pt ON p.person_type_id = pt.id
310
+ LEFT JOIN roles r ON p.known_for_role_id = r.id;
311
+ """
312
+
313
+ CREATE_ROLES_VIEW = """
314
+ CREATE VIEW IF NOT EXISTS roles_view AS
315
+ SELECT
316
+ r.id,
317
+ r.qid,
318
+ r.name,
319
+ r.name_normalized,
320
+ s.name as source,
321
+ r.source_identifier,
322
+ r.canon_id,
323
+ r.canon_size
324
+ FROM roles r
325
+ JOIN source_types s ON r.source_id = s.id;
326
+ """
327
+
328
+ CREATE_LOCATIONS_VIEW = """
329
+ CREATE VIEW IF NOT EXISTS locations_view AS
330
+ SELECT
331
+ l.id,
332
+ l.qid,
333
+ l.name,
334
+ l.name_normalized,
335
+ s.name as source,
336
+ l.source_identifier,
337
+ l.parent_ids,
338
+ lt.name as location_type,
339
+ slt.name as simplified_type,
340
+ l.from_date,
341
+ l.to_date,
342
+ l.canon_id,
343
+ l.canon_size
344
+ FROM locations l
345
+ JOIN source_types s ON l.source_id = s.id
346
+ JOIN location_types lt ON l.location_type_id = lt.id
347
+ JOIN simplified_location_types slt ON lt.simplified_id = slt.id;
348
+ """
349
+
350
+ # =============================================================================
351
+ # ALL DDL STATEMENTS IN ORDER
352
+ # =============================================================================
353
+
354
+ ALL_DDL_STATEMENTS = [
355
+ # Enum tables first (no dependencies)
356
+ CREATE_SOURCE_TYPES,
357
+ CREATE_PEOPLE_TYPES,
358
+ CREATE_ORGANIZATION_TYPES,
359
+ CREATE_SIMPLIFIED_LOCATION_TYPES,
360
+ CREATE_LOCATION_TYPES,
361
+ # New entity tables
362
+ CREATE_ROLES,
363
+ CREATE_ROLES_INDEXES,
364
+ CREATE_LOCATIONS,
365
+ CREATE_LOCATIONS_INDEXES,
366
+ # Main entity tables
367
+ CREATE_ORGANIZATIONS_V2,
368
+ CREATE_ORGANIZATIONS_V2_INDEXES,
369
+ CREATE_PEOPLE_V2,
370
+ CREATE_PEOPLE_V2_INDEXES,
371
+ # Reference tables
372
+ CREATE_QID_LABELS_V2,
373
+ ]
374
+
375
+ VIEW_DDL_STATEMENTS = [
376
+ CREATE_ORGANIZATIONS_VIEW,
377
+ CREATE_PEOPLE_VIEW,
378
+ CREATE_ROLES_VIEW,
379
+ CREATE_LOCATIONS_VIEW,
380
+ ]
381
+
382
+
383
+ def create_all_tables(conn, embedding_dim: int = 768) -> None:
384
+ """
385
+ Create all v2 schema tables.
386
+
387
+ Args:
388
+ conn: SQLite connection
389
+ embedding_dim: Dimension for embedding vectors
390
+ """
391
+ for ddl in ALL_DDL_STATEMENTS:
392
+ for statement in ddl.strip().split(";"):
393
+ statement = statement.strip()
394
+ if statement:
395
+ conn.execute(statement)
396
+
397
+ # Create embedding virtual tables (float32)
398
+ conn.execute(get_create_organization_embeddings(embedding_dim))
399
+ conn.execute(get_create_person_embeddings(embedding_dim))
400
+
401
+ # Create scalar embedding virtual tables (int8) for 75% storage reduction
402
+ conn.execute(get_create_organization_embeddings_scalar(embedding_dim))
403
+ conn.execute(get_create_person_embeddings_scalar(embedding_dim))
404
+
405
+ # Create views
406
+ for ddl in VIEW_DDL_STATEMENTS:
407
+ conn.execute(ddl)
408
+
409
+ conn.commit()