tech-hub-skills 1.2.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/{LICENSE → .claude/LICENSE} +21 -21
  2. package/.claude/README.md +291 -0
  3. package/.claude/bin/cli.js +266 -0
  4. package/{bin → .claude/bin}/copilot.js +182 -182
  5. package/{bin → .claude/bin}/postinstall.js +42 -42
  6. package/{tech_hub_skills/skills → .claude/commands}/README.md +336 -336
  7. package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +104 -104
  8. package/{tech_hub_skills/skills → .claude/commands}/aws.md +143 -143
  9. package/{tech_hub_skills/skills → .claude/commands}/azure.md +149 -149
  10. package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +108 -108
  11. package/{tech_hub_skills/skills → .claude/commands}/code-review.md +399 -399
  12. package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +747 -747
  13. package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +108 -108
  14. package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +113 -113
  15. package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +102 -102
  16. package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +123 -123
  17. package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +109 -109
  18. package/{tech_hub_skills/skills → .claude/commands}/devops.md +160 -160
  19. package/{tech_hub_skills/skills → .claude/commands}/docker.md +160 -160
  20. package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +613 -613
  21. package/{tech_hub_skills/skills → .claude/commands}/finops.md +184 -184
  22. package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +108 -108
  23. package/{tech_hub_skills/skills → .claude/commands}/gcp.md +143 -143
  24. package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +115 -115
  25. package/{tech_hub_skills/skills → .claude/commands}/mlops.md +187 -187
  26. package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +109 -109
  27. package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +329 -329
  28. package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +623 -623
  29. package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +102 -102
  30. package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +226 -226
  31. package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +184 -184
  32. package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +484 -484
  33. package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +324 -324
  34. package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +214 -214
  35. package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +104 -104
  36. package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +443 -443
  37. package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +109 -109
  38. package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +135 -135
  39. package/{tech_hub_skills/skills → .claude/commands}/sre.md +109 -109
  40. package/{tech_hub_skills/skills → .claude/commands}/system-design.md +126 -126
  41. package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +101 -101
  42. package/.claude/package.json +46 -0
  43. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
  44. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
  45. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
  46. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
  47. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
  48. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
  49. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
  50. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
  51. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
  52. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
  53. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
  54. package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
  55. package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +264 -264
  56. package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +264 -264
  57. package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +264 -264
  58. package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +264 -264
  59. package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +264 -264
  60. package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +264 -264
  61. package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +264 -264
  62. package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +264 -264
  63. package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +264 -264
  64. package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +264 -264
  65. package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +264 -264
  66. package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +394 -394
  67. package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
  68. package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
  69. package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
  70. package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +540 -540
  71. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
  72. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
  73. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
  74. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
  75. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +579 -579
  76. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
  77. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
  78. package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +112 -112
  79. package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +129 -129
  80. package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
  81. package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +39 -39
  82. package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +40 -40
  83. package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
  84. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
  85. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
  86. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
  87. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
  88. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
  89. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
  90. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
  91. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +264 -264
  92. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
  93. package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
  94. package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +264 -264
  95. package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
  96. package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +264 -264
  97. package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +264 -264
  98. package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +264 -264
  99. package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +264 -264
  100. package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
  101. package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +265 -265
  102. package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +264 -264
  103. package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +264 -264
  104. package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +264 -264
  105. package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +264 -264
  106. package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +264 -264
  107. package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +264 -264
  108. package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
  109. package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +264 -264
  110. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
  111. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
  112. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +704 -704
  113. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
  114. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
  115. package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
  116. package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
  117. package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +264 -264
  118. package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +264 -264
  119. package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +264 -264
  120. package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +264 -264
  121. package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +264 -264
  122. package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +264 -264
  123. package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +264 -264
  124. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
  125. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
  126. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
  127. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
  128. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
  129. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
  130. package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
  131. package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +382 -382
  132. package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
  133. package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +496 -496
  134. package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
  135. package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
  136. package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +319 -319
  137. package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
  138. package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
  139. package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +264 -264
  140. package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +264 -264
  141. package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +264 -264
  142. package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
  143. package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
  144. package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
  145. package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +264 -264
  146. package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +264 -264
  147. package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
  148. package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +264 -264
  149. package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +264 -264
  150. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
  151. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +521 -521
  152. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
  153. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
  154. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
  155. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
  156. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
  157. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
  158. package/.claude/skills/README.md +336 -0
  159. package/.claude/skills/ai-engineer.md +104 -0
  160. package/.claude/skills/aws.md +143 -0
  161. package/.claude/skills/azure.md +149 -0
  162. package/.claude/skills/backend-developer.md +108 -0
  163. package/.claude/skills/code-review.md +399 -0
  164. package/.claude/skills/compliance-automation.md +747 -0
  165. package/.claude/skills/compliance-officer.md +108 -0
  166. package/.claude/skills/data-engineer.md +113 -0
  167. package/.claude/skills/data-governance.md +102 -0
  168. package/.claude/skills/data-scientist.md +123 -0
  169. package/.claude/skills/database-admin.md +109 -0
  170. package/.claude/skills/devops.md +160 -0
  171. package/.claude/skills/docker.md +160 -0
  172. package/.claude/skills/enterprise-dashboard.md +613 -0
  173. package/.claude/skills/finops.md +184 -0
  174. package/.claude/skills/frontend-developer.md +108 -0
  175. package/.claude/skills/gcp.md +143 -0
  176. package/.claude/skills/ml-engineer.md +115 -0
  177. package/.claude/skills/mlops.md +187 -0
  178. package/.claude/skills/network-engineer.md +109 -0
  179. package/.claude/skills/optimization-advisor.md +329 -0
  180. package/.claude/skills/orchestrator.md +623 -0
  181. package/.claude/skills/platform-engineer.md +102 -0
  182. package/.claude/skills/process-automation.md +226 -0
  183. package/.claude/skills/process-changelog.md +184 -0
  184. package/.claude/skills/process-documentation.md +484 -0
  185. package/.claude/skills/process-kanban.md +324 -0
  186. package/.claude/skills/process-versioning.md +214 -0
  187. package/.claude/skills/product-designer.md +104 -0
  188. package/.claude/skills/project-starter.md +443 -0
  189. package/.claude/skills/qa-engineer.md +109 -0
  190. package/.claude/skills/security-architect.md +135 -0
  191. package/.claude/skills/sre.md +109 -0
  192. package/.claude/skills/system-design.md +126 -0
  193. package/.claude/skills/technical-writer.md +101 -0
  194. package/.gitattributes +2 -0
  195. package/GITHUB_COPILOT.md +106 -0
  196. package/README.md +192 -291
  197. package/package.json +16 -46
  198. package/bin/cli.js +0 -241
@@ -0,0 +1,684 @@
1
+ """
2
+ Process Parser - Parses natural language process descriptions for AI-driven automation.
3
+
4
+ Part of the Tech Hub Skills Library (sd-08: Process Automation).
5
+
6
+ This module enables dynamic process analysis by parsing unstructured or
7
+ semi-structured process descriptions written in natural language or markdown.
8
+ Designed to work seamlessly with AI assistants like VS Code GitHub Copilot.
9
+ """
10
+
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from typing import List, Dict, Optional, Tuple, Any
14
+ from enum import Enum
15
+ import json
16
+
17
+
18
+ class DocumentFormat(Enum):
19
+ """Supported document formats for process descriptions."""
20
+ MARKDOWN = "markdown"
21
+ PLAIN_TEXT = "plain_text"
22
+ STRUCTURED = "structured"
23
+ YAML_FRONTMATTER = "yaml_frontmatter"
24
+
25
+
26
+ @dataclass
27
+ class ParsedStep:
28
+ """A step extracted from natural language description."""
29
+ name: str
30
+ description: str
31
+ estimated_time: Optional[str] = None
32
+ is_manual: bool = True
33
+ tools_mentioned: List[str] = field(default_factory=list)
34
+ data_sources: List[str] = field(default_factory=list)
35
+ pain_points: List[str] = field(default_factory=list)
36
+ automation_hints: List[str] = field(default_factory=list)
37
+ sequence_number: int = 0
38
+
39
+
40
+ @dataclass
41
+ class ParsedProcess:
42
+ """Complete parsed process from natural language."""
43
+ name: str
44
+ description: str
45
+ steps: List[ParsedStep]
46
+ stakeholders: List[str]
47
+ frequency: str
48
+ pain_points: List[str]
49
+ current_tools: List[str]
50
+ data_sources: List[str]
51
+ goals: List[str]
52
+ constraints: List[str]
53
+ raw_text: str
54
+ confidence_score: float # How confident we are in the parsing
55
+
56
+ def to_analyzer_input(self) -> Dict:
57
+ """Convert to format expected by ProcessAnalyzer."""
58
+ return {
59
+ "name": self.name,
60
+ "description": self.description,
61
+ "steps": [
62
+ {
63
+ "name": step.name,
64
+ "description": step.description,
65
+ "time_minutes": self._parse_time(step.estimated_time),
66
+ "manual": step.is_manual,
67
+ "tools_used": step.tools_mentioned,
68
+ "data_sources": step.data_sources,
69
+ "error_prone": len(step.pain_points) > 0,
70
+ "requires_expertise": any(
71
+ kw in step.description.lower()
72
+ for kw in ["expertise", "experienced", "specialist", "complex", "judgment"]
73
+ ),
74
+ "requires_decision": any(
75
+ kw in step.description.lower()
76
+ for kw in ["decide", "decision", "choose", "evaluate", "assess", "determine"]
77
+ )
78
+ }
79
+ for step in self.steps
80
+ ],
81
+ "frequency": self.frequency,
82
+ "stakeholders": self.stakeholders
83
+ }
84
+
85
+ def _parse_time(self, time_str: Optional[str]) -> float:
86
+ """Parse time string to minutes."""
87
+ if not time_str:
88
+ return 30 # Default
89
+
90
+ time_str = time_str.lower()
91
+
92
+ # Extract numbers
93
+ numbers = re.findall(r'(\d+(?:\.\d+)?)', time_str)
94
+ if not numbers:
95
+ return 30
96
+
97
+ value = float(numbers[0])
98
+
99
+ # Determine unit
100
+ if 'hour' in time_str or 'hr' in time_str:
101
+ return value * 60
102
+ elif 'day' in time_str:
103
+ return value * 480 # 8 hour workday
104
+ elif 'min' in time_str:
105
+ return value
106
+ elif 'sec' in time_str:
107
+ return value / 60
108
+ else:
109
+ return value # Assume minutes
110
+
111
+ def to_json(self) -> str:
112
+ """Convert to JSON for AI consumption."""
113
+ return json.dumps({
114
+ "process": {
115
+ "name": self.name,
116
+ "description": self.description,
117
+ "frequency": self.frequency,
118
+ "stakeholders": self.stakeholders,
119
+ "goals": self.goals,
120
+ "constraints": self.constraints,
121
+ "pain_points": self.pain_points,
122
+ "current_tools": self.current_tools,
123
+ "data_sources": self.data_sources
124
+ },
125
+ "steps": [
126
+ {
127
+ "sequence": s.sequence_number,
128
+ "name": s.name,
129
+ "description": s.description,
130
+ "estimated_time": s.estimated_time,
131
+ "is_manual": s.is_manual,
132
+ "tools": s.tools_mentioned,
133
+ "data_sources": s.data_sources,
134
+ "pain_points": s.pain_points,
135
+ "automation_hints": s.automation_hints
136
+ }
137
+ for s in self.steps
138
+ ],
139
+ "parsing_confidence": self.confidence_score
140
+ }, indent=2)
141
+
142
+
143
+ class ProcessParser:
144
+ """
145
+ Parses natural language process descriptions into structured data.
146
+
147
+ Designed for dynamic, AI-driven automation discovery. Users can write
148
+ process documentation in natural language, and this parser extracts
149
+ the structured information needed for automation analysis.
150
+
151
+ Works seamlessly with VS Code GitHub Copilot and other AI assistants.
152
+ """
153
+
154
+ # Keywords for identifying different aspects
155
+ FREQUENCY_KEYWORDS = {
156
+ "hourly": ["hourly", "every hour", "each hour"],
157
+ "daily": ["daily", "every day", "each day", "once a day"],
158
+ "weekly": ["weekly", "every week", "each week", "once a week"],
159
+ "bi-weekly": ["bi-weekly", "every two weeks", "fortnightly"],
160
+ "monthly": ["monthly", "every month", "each month", "once a month"],
161
+ "quarterly": ["quarterly", "every quarter", "each quarter"],
162
+ "annually": ["annually", "yearly", "every year", "once a year"],
163
+ "ad-hoc": ["ad-hoc", "as needed", "on demand", "when required"]
164
+ }
165
+
166
+ TOOL_PATTERNS = [
167
+ r'\b(Excel|Word|PowerPoint|Outlook|Teams|SharePoint)\b',
168
+ r'\b(Salesforce|SAP|Oracle|Workday|ServiceNow)\b',
169
+ r'\b(Jira|Confluence|Slack|Asana|Trello|Monday)\b',
170
+ r'\b(Power BI|Tableau|Looker|Qlik)\b',
171
+ r'\b(Python|SQL|R|JavaScript|VBA)\b',
172
+ r'\b(Azure|AWS|GCP|Databricks|Snowflake)\b',
173
+ r'\b(API|REST|GraphQL|webhook)\b',
174
+ r'\b(email|Email|e-mail)\b',
175
+ r'\b(PDF|CSV|JSON|XML)\b',
176
+ ]
177
+
178
+ DATA_SOURCE_PATTERNS = [
179
+ r'\b(database|DB|data warehouse|data lake)\b',
180
+ r'\b(CRM|ERP|HRIS|HCM)\b',
181
+ r'\b(spreadsheet|worksheet|workbook)\b',
182
+ r'\b(report|dashboard|analytics)\b',
183
+ r'\b(file|folder|directory|share)\b',
184
+ r'\b(API|web service|endpoint)\b',
185
+ r'\b(email|inbox|mailbox)\b',
186
+ ]
187
+
188
+ PAIN_POINT_INDICATORS = [
189
+ r'(takes too long|time-consuming|tedious)',
190
+ r'(error-prone|mistakes|errors|inaccurate)',
191
+ r'(manual|manually|by hand)',
192
+ r'(repetitive|boring|mundane)',
193
+ r'(bottleneck|delays?|waiting)',
194
+ r'(frustrating|painful|difficult)',
195
+ r'(copy.?paste|copying|duplicate)',
196
+ r'(inconsistent|varies|depends on)',
197
+ ]
198
+
199
+ AUTOMATION_HINT_PATTERNS = [
200
+ r'(could be automated|should automate|needs automation)',
201
+ r'(same steps|repeated|routine)',
202
+ r'(template|standardized|consistent)',
203
+ r'(rule-based|if-then|conditions?)',
204
+ r'(schedule|scheduled|recurring)',
205
+ r'(notify|notification|alert)',
206
+ r'(extract|transform|load|ETL)',
207
+ r'(validate|validation|check)',
208
+ ]
209
+
210
+ def __init__(self):
211
+ """Initialize the parser."""
212
+ self._compile_patterns()
213
+
214
+ def _compile_patterns(self):
215
+ """Compile regex patterns for efficiency."""
216
+ self.tool_regex = [re.compile(p, re.IGNORECASE) for p in self.TOOL_PATTERNS]
217
+ self.data_source_regex = [re.compile(p, re.IGNORECASE) for p in self.DATA_SOURCE_PATTERNS]
218
+ self.pain_point_regex = [re.compile(p, re.IGNORECASE) for p in self.PAIN_POINT_INDICATORS]
219
+ self.automation_hint_regex = [re.compile(p, re.IGNORECASE) for p in self.AUTOMATION_HINT_PATTERNS]
220
+
221
+ def parse(self, text: str, format_hint: Optional[DocumentFormat] = None) -> ParsedProcess:
222
+ """
223
+ Parse a natural language process description.
224
+
225
+ Args:
226
+ text: The process description text
227
+ format_hint: Optional hint about the document format
228
+
229
+ Returns:
230
+ ParsedProcess with extracted information
231
+ """
232
+ # Detect format if not provided
233
+ doc_format = format_hint or self._detect_format(text)
234
+
235
+ # Extract components based on format
236
+ if doc_format == DocumentFormat.MARKDOWN:
237
+ return self._parse_markdown(text)
238
+ elif doc_format == DocumentFormat.YAML_FRONTMATTER:
239
+ return self._parse_yaml_frontmatter(text)
240
+ else:
241
+ return self._parse_plain_text(text)
242
+
243
+ def _detect_format(self, text: str) -> DocumentFormat:
244
+ """Detect the format of the input text."""
245
+ if text.strip().startswith('---'):
246
+ return DocumentFormat.YAML_FRONTMATTER
247
+ elif re.search(r'^#+\s', text, re.MULTILINE):
248
+ return DocumentFormat.MARKDOWN
249
+ elif re.search(r'^\d+\.\s|^-\s|^\*\s', text, re.MULTILINE):
250
+ return DocumentFormat.MARKDOWN
251
+ else:
252
+ return DocumentFormat.PLAIN_TEXT
253
+
254
+ def _parse_markdown(self, text: str) -> ParsedProcess:
255
+ """Parse markdown-formatted process description."""
256
+ lines = text.split('\n')
257
+
258
+ # Extract title from first heading
259
+ name = "Untitled Process"
260
+ for line in lines:
261
+ if line.startswith('#'):
262
+ name = re.sub(r'^#+\s*', '', line).strip()
263
+ break
264
+
265
+ # Extract sections
266
+ sections = self._extract_sections(text)
267
+
268
+ # Parse steps from numbered lists or step sections
269
+ steps = self._extract_steps_markdown(text, sections)
270
+
271
+ # Extract other components
272
+ description = sections.get('overview', sections.get('description', ''))
273
+ stakeholders = self._extract_list_items(sections.get('stakeholders', ''))
274
+ frequency = self._detect_frequency(text)
275
+ pain_points = self._extract_pain_points(text)
276
+ current_tools = self._extract_tools(text)
277
+ data_sources = self._extract_data_sources(text)
278
+ goals = self._extract_list_items(sections.get('goals', sections.get('objectives', '')))
279
+ constraints = self._extract_list_items(sections.get('constraints', sections.get('limitations', '')))
280
+
281
+ # Calculate confidence
282
+ confidence = self._calculate_confidence(steps, stakeholders, description)
283
+
284
+ return ParsedProcess(
285
+ name=name,
286
+ description=description if isinstance(description, str) else ' '.join(description),
287
+ steps=steps,
288
+ stakeholders=stakeholders,
289
+ frequency=frequency,
290
+ pain_points=pain_points,
291
+ current_tools=current_tools,
292
+ data_sources=data_sources,
293
+ goals=goals,
294
+ constraints=constraints,
295
+ raw_text=text,
296
+ confidence_score=confidence
297
+ )
298
+
299
+ def _parse_yaml_frontmatter(self, text: str) -> ParsedProcess:
300
+ """Parse markdown with YAML frontmatter."""
301
+ # Split frontmatter from content
302
+ parts = text.split('---', 2)
303
+ if len(parts) >= 3:
304
+ frontmatter = parts[1]
305
+ content = parts[2]
306
+ else:
307
+ frontmatter = ""
308
+ content = text
309
+
310
+ # Parse frontmatter for metadata
311
+ metadata = self._parse_simple_yaml(frontmatter)
312
+
313
+ # Parse content as markdown
314
+ parsed = self._parse_markdown(content)
315
+
316
+ # Override with frontmatter values
317
+ if metadata.get('name'):
318
+ parsed.name = metadata['name']
319
+ if metadata.get('frequency'):
320
+ parsed.frequency = metadata['frequency']
321
+ if metadata.get('stakeholders'):
322
+ parsed.stakeholders = metadata['stakeholders'] if isinstance(metadata['stakeholders'], list) else [metadata['stakeholders']]
323
+
324
+ return parsed
325
+
326
+ def _parse_plain_text(self, text: str) -> ParsedProcess:
327
+ """Parse plain text process description."""
328
+ paragraphs = text.split('\n\n')
329
+
330
+ # First paragraph is usually the description
331
+ description = paragraphs[0] if paragraphs else ""
332
+ name = description[:50].strip() + "..." if len(description) > 50 else description
333
+
334
+ # Look for step patterns
335
+ steps = self._extract_steps_plain(text)
336
+
337
+ # Extract other components
338
+ frequency = self._detect_frequency(text)
339
+ pain_points = self._extract_pain_points(text)
340
+ current_tools = self._extract_tools(text)
341
+ data_sources = self._extract_data_sources(text)
342
+
343
+ confidence = self._calculate_confidence(steps, [], description)
344
+
345
+ return ParsedProcess(
346
+ name=name,
347
+ description=description,
348
+ steps=steps,
349
+ stakeholders=[],
350
+ frequency=frequency,
351
+ pain_points=pain_points,
352
+ current_tools=current_tools,
353
+ data_sources=data_sources,
354
+ goals=[],
355
+ constraints=[],
356
+ raw_text=text,
357
+ confidence_score=confidence
358
+ )
359
+
360
+ def _extract_sections(self, text: str) -> Dict[str, str]:
361
+ """Extract sections from markdown text."""
362
+ sections = {}
363
+ current_section = "intro"
364
+ current_content = []
365
+
366
+ for line in text.split('\n'):
367
+ if re.match(r'^#+\s', line):
368
+ if current_content:
369
+ sections[current_section.lower()] = '\n'.join(current_content)
370
+ current_section = re.sub(r'^#+\s*', '', line).strip()
371
+ current_content = []
372
+ else:
373
+ current_content.append(line)
374
+
375
+ if current_content:
376
+ sections[current_section.lower()] = '\n'.join(current_content)
377
+
378
+ return sections
379
+
380
+ def _extract_steps_markdown(self, text: str, sections: Dict) -> List[ParsedStep]:
381
+ """Extract steps from markdown."""
382
+ steps = []
383
+
384
+ # Look for steps section
385
+ steps_text = sections.get('steps', sections.get('process steps', sections.get('workflow', '')))
386
+
387
+ if not steps_text:
388
+ # Look for numbered list anywhere
389
+ steps_text = text
390
+
391
+ # Find numbered list items
392
+ step_pattern = r'(?:^|\n)\s*(\d+)[.\)]\s*\*?\*?([^:\n]+?)(?:\*?\*?)?\s*(?::|\n|$)(.*?)(?=(?:\n\s*\d+[.\)]|\Z))'
393
+ matches = re.findall(step_pattern, steps_text, re.DOTALL | re.MULTILINE)
394
+
395
+ if matches:
396
+ for i, (num, name, desc) in enumerate(matches):
397
+ step = self._create_step(name.strip(), desc.strip(), i + 1)
398
+ steps.append(step)
399
+ else:
400
+ # Try bullet points
401
+ bullet_pattern = r'(?:^|\n)\s*[-*]\s+(.+?)(?=\n\s*[-*]|\n\n|\Z)'
402
+ matches = re.findall(bullet_pattern, steps_text, re.DOTALL)
403
+ for i, content in enumerate(matches):
404
+ step = self._create_step(content.strip()[:100], content.strip(), i + 1)
405
+ steps.append(step)
406
+
407
+ return steps
408
+
409
+ def _extract_steps_plain(self, text: str) -> List[ParsedStep]:
410
+ """Extract steps from plain text."""
411
+ steps = []
412
+
413
+ # Look for patterns like "First, ...", "Then, ...", "Finally, ..."
414
+ sequence_words = [
415
+ (r'\bfirst\b', 1), (r'\bsecond\b', 2), (r'\bthird\b', 3),
416
+ (r'\bthen\b', 0), (r'\bnext\b', 0), (r'\bafter that\b', 0),
417
+ (r'\bfinally\b', 99), (r'\blast(?:ly)?\b', 99)
418
+ ]
419
+
420
+ sentences = re.split(r'[.!?]+', text)
421
+ step_num = 0
422
+
423
+ for sentence in sentences:
424
+ sentence = sentence.strip()
425
+ if len(sentence) < 10:
426
+ continue
427
+
428
+ for pattern, _ in sequence_words:
429
+ if re.search(pattern, sentence, re.IGNORECASE):
430
+ step_num += 1
431
+ step = self._create_step(sentence[:100], sentence, step_num)
432
+ steps.append(step)
433
+ break
434
+
435
+ return steps
436
+
437
+ def _create_step(self, name: str, description: str, sequence: int) -> ParsedStep:
438
+ """Create a ParsedStep with extracted metadata."""
439
+ # Extract time estimate
440
+ time_match = re.search(r'(\d+)\s*(minutes?|mins?|hours?|hrs?|days?)', description, re.IGNORECASE)
441
+ time_str = time_match.group(0) if time_match else None
442
+
443
+ # Check if manual
444
+ is_manual = not any(kw in description.lower() for kw in ['automated', 'automatic', 'auto-'])
445
+
446
+ # Extract tools
447
+ tools = []
448
+ for regex in self.tool_regex:
449
+ tools.extend(regex.findall(description))
450
+
451
+ # Extract data sources
452
+ data_sources = []
453
+ for regex in self.data_source_regex:
454
+ data_sources.extend(regex.findall(description))
455
+
456
+ # Extract pain points
457
+ pain_points = []
458
+ for regex in self.pain_point_regex:
459
+ if regex.search(description):
460
+ pain_points.append(regex.pattern)
461
+
462
+ # Extract automation hints
463
+ automation_hints = []
464
+ for regex in self.automation_hint_regex:
465
+ if regex.search(description):
466
+ automation_hints.append(regex.pattern)
467
+
468
+ return ParsedStep(
469
+ name=name,
470
+ description=description,
471
+ estimated_time=time_str,
472
+ is_manual=is_manual,
473
+ tools_mentioned=list(set(tools)),
474
+ data_sources=list(set(data_sources)),
475
+ pain_points=pain_points,
476
+ automation_hints=automation_hints,
477
+ sequence_number=sequence
478
+ )
479
+
480
+ def _extract_list_items(self, text: str) -> List[str]:
481
+ """Extract list items from text."""
482
+ items = []
483
+ # Bullet points
484
+ items.extend(re.findall(r'[-*]\s+(.+?)(?:\n|$)', text))
485
+ # Numbered
486
+ items.extend(re.findall(r'\d+[.\)]\s+(.+?)(?:\n|$)', text))
487
+ # Comma-separated in a sentence
488
+ if not items and ',' in text:
489
+ items = [i.strip() for i in text.split(',')]
490
+ return [i.strip() for i in items if i.strip()]
491
+
492
+ def _detect_frequency(self, text: str) -> str:
493
+ """Detect process frequency from text."""
494
+ text_lower = text.lower()
495
+ for freq, keywords in self.FREQUENCY_KEYWORDS.items():
496
+ for kw in keywords:
497
+ if kw in text_lower:
498
+ return freq
499
+ return "ad-hoc"
500
+
501
+ def _extract_pain_points(self, text: str) -> List[str]:
502
+ """Extract pain points from text."""
503
+ pain_points = []
504
+ for regex in self.pain_point_regex:
505
+ matches = regex.findall(text)
506
+ pain_points.extend(matches)
507
+ return list(set(pain_points))
508
+
509
+ def _extract_tools(self, text: str) -> List[str]:
510
+ """Extract mentioned tools from text."""
511
+ tools = []
512
+ for regex in self.tool_regex:
513
+ tools.extend(regex.findall(text))
514
+ return list(set(tools))
515
+
516
+ def _extract_data_sources(self, text: str) -> List[str]:
517
+ """Extract data sources from text."""
518
+ sources = []
519
+ for regex in self.data_source_regex:
520
+ sources.extend(regex.findall(text))
521
+ return list(set(sources))
522
+
523
+ def _parse_simple_yaml(self, yaml_text: str) -> Dict[str, Any]:
524
+ """Simple YAML parser for frontmatter."""
525
+ result = {}
526
+ current_key = None
527
+ current_list = []
528
+
529
+ for line in yaml_text.split('\n'):
530
+ line = line.strip()
531
+ if not line:
532
+ continue
533
+
534
+ if ':' in line and not line.startswith('-'):
535
+ if current_key and current_list:
536
+ result[current_key] = current_list
537
+ current_list = []
538
+
539
+ key, value = line.split(':', 1)
540
+ key = key.strip()
541
+ value = value.strip()
542
+
543
+ if value:
544
+ result[key] = value
545
+ else:
546
+ current_key = key
547
+ elif line.startswith('-'):
548
+ current_list.append(line[1:].strip())
549
+
550
+ if current_key and current_list:
551
+ result[current_key] = current_list
552
+
553
+ return result
554
+
555
+ def _calculate_confidence(self, steps: List[ParsedStep], stakeholders: List[str], description: str) -> float:
556
+ """Calculate parsing confidence score."""
557
+ confidence = 50.0 # Base
558
+
559
+ # More steps = more confidence (up to 20 points)
560
+ confidence += min(len(steps) * 5, 20)
561
+
562
+ # Stakeholders identified = +10
563
+ if stakeholders:
564
+ confidence += 10
565
+
566
+ # Good description = +10
567
+ if len(description) > 100:
568
+ confidence += 10
569
+
570
+ # Steps have details = +10
571
+ detailed_steps = sum(1 for s in steps if len(s.description) > 50)
572
+ confidence += min(detailed_steps * 2, 10)
573
+
574
+ return min(confidence, 100)
575
+
576
+
577
+ # Example usage and AI integration helpers
578
+ def parse_for_copilot(process_description: str) -> str:
579
+ """
580
+ Parse a process description and return JSON for AI consumption.
581
+
582
+ Use this in VS Code with GitHub Copilot:
583
+ 1. Write your process description in a markdown file
584
+ 2. Call this function with the description
585
+ 3. Pass the output to the AI prompt generator
586
+
587
+ Example:
588
+ from process_parser import parse_for_copilot
589
+
590
+ description = '''
591
+ # Monthly Sales Report Process
592
+
593
+ ## Overview
594
+ Every month we generate sales reports for leadership.
595
+
596
+ ## Steps
597
+ 1. Export data from Salesforce (30 mins)
598
+ 2. Clean data in Excel (1 hour) - very error-prone!
599
+ 3. Create pivot tables (45 mins)
600
+ 4. Generate charts (30 mins)
601
+ 5. Write summary (1 hour)
602
+ 6. Email to stakeholders (15 mins)
603
+
604
+ ## Stakeholders
605
+ - Sales Director
606
+ - CFO
607
+ - Regional Managers
608
+ '''
609
+
610
+ parsed_json = parse_for_copilot(description)
611
+ print(parsed_json)
612
+ """
613
+ parser = ProcessParser()
614
+ parsed = parser.parse(process_description)
615
+ return parsed.to_json()
616
+
617
+
618
+ if __name__ == "__main__":
619
+ # Demo with sample process
620
+ sample = """
621
+ # Weekly Customer Feedback Analysis
622
+
623
+ ## Overview
624
+ Every week, we analyze customer feedback from multiple sources to identify
625
+ trends and issues. This process takes about 4 hours and is mostly manual.
626
+
627
+ ## Current Pain Points
628
+ - Data comes from 5 different sources (email, surveys, social, support tickets, reviews)
629
+ - Manually copying data into Excel is tedious and error-prone
630
+ - Categorization is inconsistent between team members
631
+ - Report formatting takes too long
632
+
633
+ ## Steps
634
+ 1. **Export survey responses** from SurveyMonkey (20 mins)
635
+ 2. **Download support tickets** from Zendesk (15 mins)
636
+ 3. **Scrape social mentions** - manual copy/paste from Hootsuite (30 mins)
637
+ 4. **Consolidate in Excel** - merge all sources, very tedious (45 mins)
638
+ 5. **Categorize feedback** - requires judgment, inconsistent (1 hour)
639
+ 6. **Identify trends** - pivot tables and analysis (45 mins)
640
+ 7. **Generate charts** in Power BI (30 mins)
641
+ 8. **Write summary report** - needs expertise (1 hour)
642
+ 9. **Send to stakeholders** via email (10 mins)
643
+
644
+ ## Stakeholders
645
+ - Product Manager
646
+ - Customer Success Lead
647
+ - VP of Product
648
+ - Support Team Lead
649
+
650
+ ## Goals
651
+ - Reduce time spent from 4 hours to under 1 hour
652
+ - Improve categorization consistency
653
+ - Enable real-time or daily analysis instead of weekly
654
+
655
+ ## Constraints
656
+ - Budget is limited
657
+ - Team has basic Python skills
658
+ - Must maintain data privacy (GDPR)
659
+ """
660
+
661
+ parser = ProcessParser()
662
+ result = parser.parse(sample)
663
+
664
+ print("=" * 60)
665
+ print("PARSED PROCESS")
666
+ print("=" * 60)
667
+ print(f"Name: {result.name}")
668
+ print(f"Frequency: {result.frequency}")
669
+ print(f"Confidence: {result.confidence_score}%")
670
+ print(f"\nStakeholders: {', '.join(result.stakeholders)}")
671
+ print(f"Tools Found: {', '.join(result.current_tools)}")
672
+ print(f"Pain Points: {result.pain_points}")
673
+ print(f"\nSteps ({len(result.steps)}):")
674
+ for step in result.steps:
675
+ print(f" {step.sequence_number}. {step.name}")
676
+ if step.estimated_time:
677
+ print(f" Time: {step.estimated_time}")
678
+ if step.pain_points:
679
+ print(f" Issues: {step.pain_points}")
680
+
681
+ print("\n" + "=" * 60)
682
+ print("JSON OUTPUT FOR AI")
683
+ print("=" * 60)
684
+ print(result.to_json())