elaws-parser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,739 @@
1
+ """
2
+ YAMLパーサを利用する場合のlanggraphコード
3
+
4
+ # TODO :: law_extraction.pyとかぶっているGraphBuilderなどのリファクタリング
5
+ # TODO :: YAMLArticleExtractorとRegulationExtractorも共通部分が多いのでまとめられないか検討
6
+ # TODO :: YamlArticleExtractorはyaml_converterと密接に繋がっているので,場所を移動する.
7
+ """
8
+
9
+ import logging
10
+ from abc import ABC, abstractmethod
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Literal, Optional, TypedDict
15
+
16
+ import yaml
17
+ from langchain_core.language_models import BaseLLM
18
+ from langchain_core.messages import BaseMessage, SystemMessage
19
+ from langchain_core.prompts import PromptTemplate
20
+ from langchain_openai import ChatOpenAI
21
+ from langgraph.graph import END, StateGraph # CompiledGraph
22
+ from pydantic import BaseModel, Field
23
+
24
+ from .law_extraction import ( # RegulationExtractor,
25
+ BaseExtractor,
26
+ ExtractionResult,
27
+ GraphState,
28
+ LegalDocument,
29
+ ProcessingStage,
30
+ PromptManager,
31
+ ViewpointGenerator,
32
+ flatten_state,
33
+ )
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class RelevantArticles(BaseModel):
39
+ """関連条文のstructured output用Pydanticモデル"""
40
+
41
+ article_numbers: List[str] = Field(
42
+ description="関連する条文番号のリスト(例: ['3', '4', '20_2', '62'])"
43
+ )
44
+ extraction_reasoning: str = Field(
45
+ description="これらの条文を選択した理由の簡潔な説明"
46
+ )
47
+
48
+
49
+ @dataclass
50
+ class ExtractedArticleContent:
51
+ """抽出された条文内容"""
52
+
53
+ article_num: str
54
+ title: str
55
+ full_content: str
56
+ found: bool = True
57
+
58
+
59
+ class YamlArticleExtractor:
60
+ """YAML構造から条文を抽出するクラス"""
61
+
62
+ def __init__(self, yaml_data: Dict[str, Any]):
63
+ """
64
+ Args:
65
+ yaml_data: 法令のYAMLデータ辞書
66
+ """
67
+ self.yaml_data = yaml_data
68
+
69
+ def extract_articles_by_numbers(
70
+ self, article_numbers: List[str]
71
+ ) -> List[ExtractedArticleContent]:
72
+ """指定された条文番号リストから条文内容を抽出
73
+
74
+ Args:
75
+ article_numbers: 抽出したい条文番号のリスト
76
+
77
+ Returns:
78
+ 抽出された条文内容のリスト
79
+ """
80
+ extracted_articles = []
81
+
82
+ for article_num in article_numbers: # 条文でループ
83
+ try:
84
+ article_content = self._find_and_extract_article(article_num)
85
+ extracted_articles.append(article_content)
86
+ except Exception as e:
87
+ logger.error(f"条文{article_num}の抽出でエラー: {e}")
88
+ # 見つからない場合でもエラー情報を含めて追加
89
+ extracted_articles.append(
90
+ ExtractedArticleContent(
91
+ article_num=article_num,
92
+ title="",
93
+ full_content=f"第{article_num}条の内容を取得できませんでした",
94
+ found=False,
95
+ )
96
+ )
97
+
98
+ return extracted_articles
99
+
100
+ def _find_and_extract_article(self, article_num: str) -> ExtractedArticleContent:
101
+ """指定された条文番号の条文を直接検索・抽出
102
+
103
+ Args:
104
+ article_num: 条文番号
105
+
106
+ Returns:
107
+ 抽出された条文内容
108
+ """
109
+ # YAML構造を直接検索
110
+ target_article = self._search_article_in_yaml(article_num)
111
+
112
+ if not target_article:
113
+ raise ValueError(f"第{article_num}条が見つかりません")
114
+
115
+ # 条文の完全な内容を抽出
116
+ title = target_article.get("title", "")
117
+ full_content = self._extract_full_article_text(target_article)
118
+
119
+ return ExtractedArticleContent(
120
+ article_num=article_num, title=title, full_content=full_content, found=True
121
+ )
122
+
123
+ def _search_article_in_yaml(self, article_num: str) -> Optional[Dict[str, Any]]:
124
+ """YAML構造から指定された条文番号を直接検索
125
+ # TODO :: もう少し綺麗に実装する.三つあるのがやばい.
126
+ """
127
+
128
+ # part構造(一部の大規模法令)
129
+ if "parts" in self.yaml_data:
130
+ print("Part 構造です")
131
+ for part in self.yaml_data["parts"]:
132
+ if "chapters" in part:
133
+ for chapter in part["chapters"]:
134
+ # 章直下の条文を検索
135
+ if "articles" in chapter:
136
+ for article in chapter["articles"]:
137
+ if article.get("article_num") == article_num:
138
+ return article
139
+
140
+ # 節の下の条文を検索
141
+ if "sections" in chapter:
142
+ for section in chapter["sections"]:
143
+ # section直下がsubsectionの場合
144
+ if "subsections" in section:
145
+ for subsection in section["subsections"]:
146
+ if "articles" in subsection:
147
+ for article in subsection["articles"]:
148
+ if (
149
+ article.get("article_num")
150
+ == article_num
151
+ ):
152
+ return article
153
+
154
+ # section直下がarticleの場合
155
+ if "articles" in section:
156
+ for article in section["articles"]:
157
+ if article.get("article_num") == article_num:
158
+ return article
159
+
160
+ # chapters構造(一般的な法令)
161
+ elif "chapters" in self.yaml_data:
162
+ print("Chapter 構造です")
163
+ for chapter in self.yaml_data["chapters"]:
164
+ # 章直下の条文を検索
165
+ if "articles" in chapter:
166
+ for article in chapter["articles"]:
167
+ if article.get("article_num") == article_num:
168
+ return article
169
+
170
+ # 節の下の条文を検索
171
+ if "sections" in chapter:
172
+ for section in chapter["sections"]:
173
+ # section直下がsubsectionの場合
174
+ if "subsections" in section:
175
+ for subsection in section["subsections"]:
176
+ # section直下がarticleの場合
177
+ if "articles" in subsection:
178
+ for article in subsection["articles"]:
179
+ if article.get("article_num") == article_num:
180
+ return article
181
+
182
+ # section直下がarticleの場合
183
+ if "articles" in section:
184
+ for article in section["articles"]:
185
+ if article.get("article_num") == article_num:
186
+ return article
187
+
188
+ # articles構造(施行規則等)
189
+ elif "articles" in self.yaml_data:
190
+ print("article 構造です")
191
+ for article in self.yaml_data["articles"]:
192
+ if article.get("article_num") == article_num:
193
+ return article
194
+
195
+ logger.warning("yaml構造にparts,chapters,articlesが存在しません.")
196
+ return None
197
+
198
+ def _extract_full_article_text(self, article: Dict[str, Any]) -> str:
199
+ """条文の完全なテキストを抽出
200
+ # TODO :: ここもどのようにテキスト化するか,論点になる.
201
+ # TODO :: もっというと,xml->yaml->textとしてるので非常に効率が悪い.
202
+ """
203
+ content_parts = []
204
+
205
+ # 条文タイトル
206
+ title = article.get("title", "")
207
+ caption = article.get("caption", "")
208
+ article_num = article.get("article_num", "?")
209
+
210
+ # 条文ヘッダー
211
+ if caption:
212
+ header = f"第{article_num}条({caption})"
213
+ else:
214
+ header = f"第{article_num}条"
215
+
216
+ if title:
217
+ header += f" {title}"
218
+
219
+ content_parts.append(header)
220
+
221
+ # 各項を処理
222
+ paragraphs = article.get("paragraphs", [])
223
+ for i, paragraph in enumerate(paragraphs):
224
+ paragraph_content = self._extract_paragraph_text(paragraph, i + 1)
225
+ if paragraph_content:
226
+ content_parts.append(paragraph_content)
227
+
228
+ return "\n".join(content_parts)
229
+
230
+ def _extract_paragraph_text(
231
+ self, paragraph: Dict[str, Any], paragraph_index: int
232
+ ) -> str:
233
+ """項のテキストを抽出"""
234
+ parts = []
235
+
236
+ # 項番号(明示的にない場合は項番号を付与)
237
+ paragraph_num = paragraph.get("paragraph_num", str(paragraph_index))
238
+ if (
239
+ paragraph_num and paragraph_num != "1"
240
+ ): # 第1項の場合は番号を省略することが多い
241
+ parts.append(f"(第{paragraph_num}項)")
242
+
243
+ # 項の本文
244
+ content = paragraph.get("content", "")
245
+ if content:
246
+ parts.append(content)
247
+
248
+ # 号がある場合
249
+ items = paragraph.get("items", [])
250
+ for item in items:
251
+ item_text = self._extract_item_text(item)
252
+ if item_text:
253
+ parts.append(f" {item_text}")
254
+
255
+ # 表がある場合
256
+ if "table" in paragraph:
257
+ table_text = self._extract_table_text(paragraph["table"])
258
+ if table_text:
259
+ parts.append(f"【表】\n{table_text}")
260
+
261
+ return "\n".join(parts) if parts else ""
262
+
263
+ def _extract_item_text(self, item: Dict[str, Any]) -> str:
264
+ """号のテキストを抽出"""
265
+ parts = []
266
+
267
+ # 号番号とタイトル
268
+ title = item.get("title", "")
269
+ content = item.get("content", "")
270
+
271
+ if title and content:
272
+ parts.append(f"{title} {content}")
273
+ elif content:
274
+ parts.append(content)
275
+
276
+ # サブ項目がある場合(イロハなど)
277
+ subitems = item.get("subitems", [])
278
+ for subitem in subitems:
279
+ subitem_text = self._extract_subitem_text(subitem)
280
+ if subitem_text:
281
+ parts.append(f" {subitem_text}")
282
+
283
+ return "\n".join(parts) if parts else ""
284
+
285
+ def _extract_subitem_text(self, subitem: Dict[str, Any]) -> str:
286
+ """サブ項目のテキストを抽出(再帰的)"""
287
+ parts = []
288
+
289
+ # サブ項目のタイトルと内容
290
+ title = subitem.get("title", "")
291
+ content = subitem.get("content", "")
292
+
293
+ if title and content:
294
+ parts.append(f"{title} {content}")
295
+ elif content:
296
+ parts.append(content)
297
+
298
+ # ネストしたサブ項目
299
+ nested_subitems = subitem.get("subitems", [])
300
+ for nested_subitem in nested_subitems:
301
+ nested_text = self._extract_subitem_text(nested_subitem)
302
+ if nested_text:
303
+ parts.append(f" {nested_text}")
304
+
305
+ return "\n".join(parts) if parts else ""
306
+
307
+ def _extract_table_text(self, table: Dict[str, Any]) -> str:
308
+ """表のテキストを抽出"""
309
+ rows = table.get("rows", [])
310
+ if not rows:
311
+ return ""
312
+
313
+ table_lines = []
314
+ for row in rows:
315
+ if isinstance(row, list):
316
+ # セル区切り文字として|を使用
317
+ table_lines.append("| " + " | ".join(str(cell) for cell in row) + " |")
318
+
319
+ return "\n".join(table_lines)
320
+
321
+
322
+ class LawExtractor(BaseExtractor):
323
+ """法令本体からの関連条文抽出(yaml版)"""
324
+
325
+ def extract(self, state: GraphState) -> ExtractionResult:
326
+ """法令から関連条文を抽出(2段階方式)"""
327
+ logger.info("法令からの関連条文抽出を開始")
328
+
329
+ # ステップ1: LLMで関連条文番号を特定
330
+ relevant_articles = self._identify_relevant_articles(state)
331
+ # print("relevant_articles = :: ", relevant_articles)
332
+
333
+ # ステップ2: YAML構造から該当条文を抽出
334
+ extracted_content = self._extract_articles_from_yaml(
335
+ state["law_document"], relevant_articles.article_numbers
336
+ )
337
+ # print("extracted_content = :: ", extracted_content)
338
+
339
+ return ExtractionResult(
340
+ content=extracted_content,
341
+ metadata={
342
+ "stage": "law_extraction",
343
+ "source_document": state["law_document"].name,
344
+ "target_articles": state["target_articles"],
345
+ "identified_articles": relevant_articles.article_numbers,
346
+ "extraction_reasoning": relevant_articles.extraction_reasoning,
347
+ },
348
+ )
349
+
350
+ def _identify_relevant_articles(self, state: GraphState) -> RelevantArticles:
351
+ """LLMを使用して関連条文番号を特定"""
352
+ logger.info("LLMによる関連条文番号の特定を開始")
353
+
354
+ # プロンプトテンプレートを読み込み
355
+ base_context = flatten_state(state)
356
+ special_context = {
357
+ "law_name": state["law_document"].name,
358
+ "law_article": ", ".join(state["target_articles"]),
359
+ "law_text": state["law_document"].content,
360
+ }
361
+
362
+ formatted_prompt = self.prompt_manager.render_prompt(
363
+ self.prompt_name,
364
+ context={**base_context, **special_context},
365
+ )
366
+
367
+ # Structured outputでLLMを呼び出し
368
+ messages = self._create_messages(formatted_prompt)
369
+
370
+ # LLMをstructured outputモードに設定
371
+ structured_llm = self.llm.with_structured_output(RelevantArticles)
372
+ response: RelevantArticles = structured_llm.invoke(messages)
373
+
374
+ logger.info(f"特定された関連条文: {response.article_numbers}")
375
+ return response
376
+
377
+ def _extract_articles_from_yaml(
378
+ self, law_document: "LegalDocument", article_numbers: List[int]
379
+ ) -> str:
380
+ """YAML構造から指定された条文を抽出"""
381
+ logger.info(f"YAML構造から{len(article_numbers)}件の条文を抽出中")
382
+
383
+ # LegalDocumentからYAMLデータを取得
384
+ # 注意: LegalDocumentクラスにyaml_dataフィールドが必要
385
+ if not hasattr(law_document, "yaml_data") or law_document.yaml_data is None:
386
+ logger.error("法令文書にYAMLデータが含まれていません")
387
+ return "エラー: YAML構造データが利用できません"
388
+
389
+ # YAML抽出器を初期化
390
+ extractor = YamlArticleExtractor(law_document.yaml_data)
391
+
392
+ # 指定された条文を抽出
393
+ extracted_articles = extractor.extract_articles_by_numbers(article_numbers)
394
+
395
+ # 抽出結果をテキストにフォーマット
396
+ return self._format_extracted_articles(extracted_articles)
397
+
398
+ def _format_extracted_articles(
399
+ self, extracted_articles: List[ExtractedArticleContent]
400
+ ) -> str:
401
+ """抽出された条文をテキスト形式でフォーマット"""
402
+ formatted_parts = [" 法令本文:抽出された関連条項"]
403
+
404
+ for article in extracted_articles:
405
+ if article.found:
406
+ formatted_parts.append(f"\n{article.full_content}\n")
407
+ else:
408
+ formatted_parts.append(f"\n【注意】{article.full_content}\n")
409
+
410
+ return "\n".join(formatted_parts)
411
+
412
+
413
+ # 使用例とテスト
414
+ def test_law_extractor():
415
+ """LawExtractorのテスト用関数"""
416
+ from unittest.mock import Mock
417
+
418
+ # サンプルYAMLデータ
419
+ sample_yaml_data = {
420
+ "law_info": {"title": "土壌汚染対策法", "law_num": "平成14年法律第53号"},
421
+ "articles": [
422
+ {
423
+ "article_num": 3,
424
+ "title": "土壌汚染状況調査",
425
+ "paragraphs": [
426
+ {
427
+ "paragraph_num": "1",
428
+ "content": "都道府県知事は、有害物質使用特定施設の使用が廃止されたときは、当該有害物質使用特定施設に係る工場又は事業場の敷地であった土地について、土壌汚染状況調査を行わせるものとする。",
429
+ "items": [
430
+ {
431
+ "item_num": 1,
432
+ "title": "一",
433
+ "content": "有害物質使用特定施設において製造、使用又は処理されていた物質",
434
+ }
435
+ ],
436
+ }
437
+ ],
438
+ },
439
+ {
440
+ "article_num": 4,
441
+ "title": "調査命令",
442
+ "paragraphs": [
443
+ {
444
+ "paragraph_num": "1",
445
+ "content": "都道府県知事は、土壌汚染により人の健康に係る被害が生ずるおそれがあるものとして環境省令で定める基準に該当する土地があると認めるときは、当該土地の所有者等に対し、土壌汚染状況調査を行うべきことを命ずることができる。",
446
+ }
447
+ ],
448
+ },
449
+ ],
450
+ }
451
+
452
+ # LegalDocumentを作成
453
+ law_document = LegalDocument(
454
+ name="土壌汚染対策法",
455
+ content="法令の本文テキスト...",
456
+ document_type="law",
457
+ yaml_data=sample_yaml_data,
458
+ )
459
+
460
+ # YamlArticleExtractorの動作テスト
461
+ extractor = YamlArticleExtractor(sample_yaml_data)
462
+ extracted = extractor.extract_articles_by_numbers([3, 4])
463
+
464
+ print("=== 条文抽出テスト結果 ===")
465
+ for article in extracted:
466
+ print(f"\n【第{article.article_num}条】")
467
+ print(f"タイトル: {article.title}")
468
+ print(f"見つかった: {article.found}")
469
+ print(f"内容:\n{article.full_content}")
470
+
471
+
472
+ class RegulationExtractor(BaseExtractor):
473
+ """施行規則からの関連条文抽出(更新版)"""
474
+
475
+ def extract(self, state: GraphState) -> ExtractionResult:
476
+ """施行規則から関連条文を抽出(2段階方式)"""
477
+ logger.info("施行規則からの関連条文抽出を開始")
478
+
479
+ # ステップ1: LLMで関連条文番号を特定
480
+ relevant_articles = self._identify_relevant_articles(state)
481
+
482
+ # ステップ2: YAML構造から該当条文を抽出
483
+ extracted_content = self._extract_articles_from_yaml(
484
+ state["regulation_document"], relevant_articles.article_numbers
485
+ )
486
+
487
+ return ExtractionResult(
488
+ content=extracted_content,
489
+ metadata={
490
+ "stage": "regulation_extraction",
491
+ "source_document": state["regulation_document"].name,
492
+ "law_reference": state["law_document"].name,
493
+ "identified_articles": relevant_articles.article_numbers,
494
+ "extraction_reasoning": relevant_articles.extraction_reasoning,
495
+ },
496
+ )
497
+
498
+ def _identify_relevant_articles(self, state: GraphState) -> RelevantArticles:
499
+ """LLMを使用して関連条文番号を特定"""
500
+ logger.info("LLMによる施行規則の関連条文番号の特定を開始")
501
+
502
+ # プロンプトテンプレートを読み込み
503
+ base_context = flatten_state(state)
504
+ special_context = {
505
+ "law_name": state["law_document"].name,
506
+ "law_article": ", ".join(state["target_articles"]),
507
+ "extracted_law_content": state["extracted_law_content"],
508
+ "regulation_text": state["regulation_document"].content,
509
+ }
510
+ formatted_prompt = self.prompt_manager.render_prompt(
511
+ self.prompt_name,
512
+ context={**base_context, **special_context},
513
+ )
514
+
515
+ # Structured outputでLLMを呼び出し
516
+ messages = self._create_messages(formatted_prompt)
517
+
518
+ # LLMをstructured outputモードに設定
519
+ structured_llm = self.llm.with_structured_output(RelevantArticles)
520
+ response = structured_llm.invoke(messages)
521
+
522
+ logger.info(f"特定された関連条文: {response.article_numbers}")
523
+ return response
524
+
525
+ def _extract_articles_from_yaml(
526
+ self, regulation_document: "LegalDocument", article_numbers: List[int]
527
+ ) -> str:
528
+ """YAML構造から指定された条文を抽出"""
529
+ logger.info(f"施行規則のYAML構造から{len(article_numbers)}件の条文を抽出中")
530
+
531
+ # LegalDocumentからYAMLデータを取得
532
+ if (
533
+ not hasattr(regulation_document, "yaml_data")
534
+ or regulation_document.yaml_data is None
535
+ ):
536
+ logger.error("施行規則文書にYAMLデータが含まれていません")
537
+ return "エラー: YAML構造データが利用できません"
538
+
539
+ # YAML抽出器を初期化
540
+ extractor = YamlArticleExtractor(regulation_document.yaml_data)
541
+
542
+ # 指定された条文を抽出
543
+ extracted_articles = extractor.extract_articles_by_numbers(article_numbers)
544
+
545
+ # 抽出結果をテキストにフォーマット
546
+ return self._format_extracted_articles(extracted_articles)
547
+
548
+ def _format_extracted_articles(
549
+ self, extracted_articles: List[ExtractedArticleContent]
550
+ ) -> str:
551
+ """抽出された条文をテキスト形式でフォーマット"""
552
+ formatted_parts = [" 施行規則:抽出された関連条項"]
553
+
554
+ for article in extracted_articles:
555
+ if article.found:
556
+ formatted_parts.append(f"\n{article.full_content}\n")
557
+ else:
558
+ formatted_parts.append(f"\n【注意】{article.full_content}\n")
559
+
560
+ return "\n".join(formatted_parts)
561
+
562
+
563
+ class GraphBuilder:
564
+ """法令要点抽出のグラフビルダー(yaml対応版)"""
565
+
566
+ # 各LLM呼び出しのプロンプト名
567
+ DEFAULT_PROMPT_NAMES = {
568
+ "extract_law": "extract_laws_v001",
569
+ "extract_regulation": "extract_regulation_v001",
570
+ "generate_summary": "v003",
571
+ }
572
+
573
+ def __init__(
574
+ self,
575
+ llm: BaseLLM,
576
+ prompts_dir: Path = Path("prompts"),
577
+ prompt_names: Optional[Dict[str, str]] = None,
578
+ ):
579
+ self.llm = llm
580
+ self.prompt_manager = PromptManager(prompts_dir)
581
+
582
+ # デフォルトとユーザ指定をマージ(ユーザ指定が優先)
583
+ self.prompt_names = {**self.DEFAULT_PROMPT_NAMES, **(prompt_names or {})}
584
+
585
+ # 各抽出器の初期化
586
+ self.law_extractor = LawExtractor(
587
+ llm, self.prompt_manager, self.prompt_names["extract_law"]
588
+ )
589
+ self.regulation_extractor = RegulationExtractor(
590
+ llm, self.prompt_manager, self.prompt_names["extract_regulation"]
591
+ )
592
+ self.summary_generator = ViewpointGenerator(
593
+ llm, self.prompt_manager, self.prompt_names["generate_summary"]
594
+ )
595
+
596
+ # グラフの構築
597
+ self.graph = self._build_graph()
598
+
599
+ def _build_graph(self): # TODO:: CompiledGraph型の戻り値を指定
600
+ """LangGraphの構築"""
601
+ workflow = StateGraph(GraphState)
602
+
603
+ # ノードの追加
604
+ workflow.add_node("extract_law", self._extract_law_node)
605
+ workflow.add_node("extract_regulation", self._extract_regulation_node)
606
+ workflow.add_node("generate_summary", self._generate_summary_node)
607
+ workflow.add_node("handle_error", self._handle_error_node)
608
+
609
+ # エッジの設定
610
+ workflow.set_entry_point("extract_law")
611
+
612
+ workflow.add_conditional_edges(
613
+ "extract_law",
614
+ self._should_continue_to_regulation,
615
+ {"continue": "extract_regulation", "error": "handle_error"},
616
+ )
617
+
618
+ workflow.add_conditional_edges(
619
+ "extract_regulation",
620
+ self._should_continue_to_summary,
621
+ {"continue": "generate_summary", "error": "handle_error"},
622
+ )
623
+
624
+ workflow.add_edge("generate_summary", END)
625
+ workflow.add_edge("handle_error", END)
626
+
627
+ return workflow.compile()
628
+
629
+ def _extract_law_node(self, state: GraphState) -> GraphState:
630
+ """法令抽出ノード"""
631
+ try:
632
+ result = self.law_extractor.extract(state)
633
+ state["extracted_law_content"] = result.content
634
+ state["current_stage"] = ProcessingStage.LAW_EXTRACTION
635
+ state["metadata"].update(result.metadata)
636
+ state["extracted_law_article_numbers"] = result.metadata[
637
+ "identified_articles"
638
+ ]
639
+ logger.info("法令抽出が完了しました")
640
+ except Exception as e:
641
+ state["error_message"] = f"法令抽出エラー: {str(e)}"
642
+ logger.error(state["error_message"])
643
+
644
+ return state
645
+
646
+ def _extract_regulation_node(self, state: GraphState) -> GraphState:
647
+ """施行規則抽出ノード"""
648
+ try:
649
+ result = self.regulation_extractor.extract(state)
650
+ state["extracted_regulation_content"] = result.content
651
+ state["current_stage"] = ProcessingStage.REGULATION_EXTRACTION
652
+ state["metadata"].update(result.metadata)
653
+ state["extracted_regulation_article_numbers"] = result.metadata[
654
+ "identified_articles"
655
+ ]
656
+ logger.info("施行規則抽出が完了しました")
657
+ except Exception as e:
658
+ state["error_message"] = f"施行規則抽出エラー: {str(e)}"
659
+ logger.error(state["error_message"])
660
+
661
+ return state
662
+
663
+ def _generate_summary_node(self, state: GraphState) -> GraphState:
664
+ """要点生成ノード"""
665
+ try:
666
+ result = self.summary_generator.extract(state)
667
+ state["final_summary"] = result.content
668
+ state["current_stage"] = ProcessingStage.COMPLETED
669
+ state["metadata"].update(result.metadata)
670
+ logger.info("要点生成が完了しました")
671
+ except Exception as e:
672
+ state["error_message"] = f"要点生成エラー: {str(e)}"
673
+ logger.error(state["error_message"])
674
+
675
+ return state
676
+
677
+ def _handle_error_node(self, state: GraphState) -> GraphState:
678
+ """エラーハンドリングノード"""
679
+ logger.error(
680
+ f"処理中にエラーが発生しました: {state.get('error_message', '不明なエラー')}"
681
+ )
682
+ state["current_stage"] = ProcessingStage.COMPLETED
683
+ return state
684
+
685
+ def _should_continue_to_regulation(self, state: GraphState) -> str:
686
+ """施行規則抽出への継続判定"""
687
+ return "error" if state.get("error_message") else "continue"
688
+
689
+ def _should_continue_to_summary(self, state: GraphState) -> str:
690
+ """要点生成への継続判定"""
691
+ return "error" if state.get("error_message") else "continue"
692
+
693
+ def process(
694
+ self,
695
+ law_document: LegalDocument,
696
+ regulation_document: LegalDocument,
697
+ target_articles: List[str],
698
+ ) -> GraphState:
699
+ """処理の実行"""
700
+ initial_state = GraphState(
701
+ law_document=law_document,
702
+ regulation_document=regulation_document,
703
+ target_articles=target_articles,
704
+ extracted_law_content=None,
705
+ extracted_law_article_numbers=None,
706
+ extracted_regulation_content=None,
707
+ extracted_regulation_article_numbers=None,
708
+ final_summary=None,
709
+ current_stage=ProcessingStage.LAW_EXTRACTION,
710
+ error_message=None,
711
+ metadata={},
712
+ )
713
+
714
+ logger.info("法令判断軸抽出処理を開始します")
715
+ result = self.graph.invoke(initial_state)
716
+ logger.info(f"処理が完了しました。ステージ: {result['current_stage']}")
717
+
718
+ return result
719
+
720
+
721
+ class LegalExtractionConfig:
722
+ """設定管理クラス"""
723
+
724
+ def __init__(
725
+ self,
726
+ llm,
727
+ prompts_dir: str = "prompts",
728
+ prompt_names: Optional[Dict[str, str]] = None,
729
+ ):
730
+ self.llm = llm
731
+ self.prompts_dir = Path(prompts_dir)
732
+ self.prompt_names = prompt_names
733
+
734
+
735
+ def create_legal_extraction_system(config: LegalExtractionConfig) -> GraphBuilder:
736
+ """法令要点抽出システムのファクトリー関数"""
737
+ return GraphBuilder(
738
+ llm=config.llm, prompts_dir=config.prompts_dir, prompt_names=config.prompt_names
739
+ )