h-ai-brain 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ import re
2
+ from urllib.parse import urlparse
3
+
4
+ from ...domain.web_docs.ecosystem_pattern_repository import EcosystemPatternRepository
5
+
6
+
7
+ class EcosystemLinkScorerService:
8
+ """Service for scoring potential ecosystem-related links and content"""
9
+
10
+ def __init__(self, pattern_repo: EcosystemPatternRepository):
11
+ self.pattern_repo = pattern_repo
12
+
13
+ def score(self, full_url: str, link_text: str) -> float:
14
+ """
15
+ Score a link based on how likely it is to be ecosystem-related
16
+ Returns a value between 0.0 and 1.0
17
+ """
18
+ score = 0.0
19
+ max_score = 3.0 # Maximum possible score
20
+
21
+ # Parse the URL
22
+ parsed_url = urlparse(full_url)
23
+ domain = parsed_url.netloc
24
+ path = parsed_url.path
25
+
26
+ # Check domain patterns
27
+ for eco_domain in self.pattern_repo.ecosystem_domains:
28
+ if eco_domain in domain:
29
+ score += 1.0
30
+ break
31
+
32
+ # Check path patterns
33
+ for path_pattern in self.pattern_repo.ecosystem_path_patterns:
34
+ if re.search(path_pattern, path):
35
+ score += 1.0
36
+ break
37
+
38
+ # Check link text patterns
39
+ for text_pattern in self.pattern_repo.ecosystem_text_patterns:
40
+ if re.search(text_pattern, link_text):
41
+ score += 1.0
42
+ break
43
+
44
+ # Normalize score to 0.0-1.0 range
45
+ return min(score / max_score, 1.0)
46
+
47
+ def score_page(self, page_url: str, page_title: str, page_content: str) -> float:
48
+ """
49
+ Score an entire page based on how likely it is to contain ecosystem information
50
+ Returns a value between 0.0 and 1.0
51
+
52
+ Args:
53
+ page_url: The URL of the page
54
+ page_title: The title of the page
55
+ page_content: The full text content of the page
56
+ """
57
+ # Start with the URL and title scoring
58
+ url_score = self.score(page_url, page_title)
59
+
60
+ # Content-based scoring
61
+ content_score = 0.0
62
+ max_content_score = 2.0
63
+
64
+ # Check content patterns
65
+ content_matches = 0
66
+ for content_pattern in self.pattern_repo.ecosystem_content_patterns:
67
+ if re.search(content_pattern, page_content):
68
+ content_matches += 1
69
+
70
+ # Score based on number of content matches
71
+ if content_matches >= 3:
72
+ content_score += 1.0
73
+ elif content_matches > 0:
74
+ content_score += 0.5
75
+
76
+ # Check for header patterns
77
+ for header_pattern in self.pattern_repo.ecosystem_header_patterns:
78
+ if re.search(header_pattern, page_content):
79
+ content_score += 1.0
80
+ break
81
+
82
+ # Combined score with higher weight on content
83
+ return min((url_score + (content_score / max_content_score) * 2) / 3, 1.0)
@@ -0,0 +1,182 @@
1
+ class EcosystemPatternRepository:
2
+ """Repository of patterns that identify ecosystem relationships, builders, and collaboration"""
3
+
4
+ def __init__(self):
5
+ # Domains commonly associated with ecosystem and project showcases
6
+ self.ecosystem_domains = [
7
+ "showcase.",
8
+ "ecosystem.",
9
+ "community.",
10
+ "gallery.",
11
+ "partners.",
12
+ "developers.",
13
+ "marketplace.",
14
+ "expo.",
15
+ "apps.",
16
+ "extensions.",
17
+ "plugins.",
18
+ ]
19
+
20
+ # URL path patterns indicating ecosystem/builder content
21
+ self.ecosystem_path_patterns = [
22
+ r"/ecosystem/",
23
+ r"/showcase/",
24
+ r"/community/",
25
+ r"/built-with/",
26
+ r"/case-studies/",
27
+ r"/customers/",
28
+ r"/partners/",
29
+ r"/users/",
30
+ r"/success-stories/",
31
+ r"/integrations/",
32
+ r"/extensions/",
33
+ r"/marketplace/",
34
+ r"/plugins/",
35
+ r"/addons/",
36
+ r"/gallery/",
37
+ r"/examples/",
38
+ r"/projects/",
39
+ r"/contributors/",
40
+ r"/whos-using/",
41
+ ]
42
+
43
+ # Link text patterns suggesting ecosystem content
44
+ self.ecosystem_text_patterns = [
45
+ r"(?i)ecosystem",
46
+ r"(?i)showcase",
47
+ r"(?i)built with",
48
+ r"(?i)powered by",
49
+ r"(?i)case stud(y|ies)",
50
+ r"(?i)success stor(y|ies)",
51
+ r"(?i)who('s| is) using",
52
+ r"(?i)our users",
53
+ r"(?i)our customers",
54
+ r"(?i)integrations?",
55
+ r"(?i)extensions?",
56
+ r"(?i)plugins?",
57
+ r"(?i)addons?",
58
+ r"(?i)community projects",
59
+ r"(?i)community contributions",
60
+ r"(?i)user contributions",
61
+ r"(?i)featured projects",
62
+ r"(?i)gallery",
63
+ ]
64
+
65
+ # Header/title patterns suggesting ecosystem sections
66
+ self.ecosystem_header_patterns = [
67
+ r"(?i)ecosystem",
68
+ r"(?i)who('s| is) using",
69
+ r"(?i)built (on|with)",
70
+ r"(?i)powered by",
71
+ r"(?i)trusted by",
72
+ r"(?i)customer(s| success)",
73
+ r"(?i)case stud(y|ies)",
74
+ r"(?i)success stor(y|ies)",
75
+ r"(?i)showcase",
76
+ r"(?i)featured (users|customers|projects)",
77
+ r"(?i)community (projects|showcase)",
78
+ r"(?i)partner(s| program)",
79
+ r"(?i)(our|notable) users",
80
+ r"(?i)companies using",
81
+ r"(?i)in production",
82
+ r"(?i)contributor(s| showcase)",
83
+ r"(?i)extension (gallery|showcase)",
84
+ r"(?i)plugin (directory|marketplace)",
85
+ r"(?i)apps? (built|marketplace|gallery)",
86
+ ]
87
+
88
+ # Content phrases that suggest ecosystem descriptions
89
+ self.ecosystem_content_patterns = [
90
+ r"(?i)built (on|with) (our|this)",
91
+ r"(?i)(companies|organizations|projects) (using|powered by)",
92
+ r"(?i)(is|are) using (our|this)",
93
+ r"(?i)powered by (our|this)",
94
+ r"(?i)extend(s|ing)? (the|our) (platform|ecosystem)",
95
+ r"(?i)integrated with",
96
+ r"(?i)build(s|ing)? (on top of|with)",
97
+ r"(?i)leverage(s|ing)? (our|this)",
98
+ r"(?i)extend(s|ing)? (the|our) (functionality|capabilities)",
99
+ r"(?i)based on (our|this)",
100
+ r"(?i)implemented (with|using)",
101
+ r"(?i)developed (with|using)",
102
+ r"(?i)(join|be part of) (our|the) ecosystem",
103
+ ]
104
+
105
+ # Builder and contribution-specific patterns
106
+ self.builder_patterns = [
107
+ r"(?i)how to (build|contribute)",
108
+ r"(?i)build(ing)? (with|on)",
109
+ r"(?i)develop(ing)? (with|on)",
110
+ r"(?i)contribute to",
111
+ r"(?i)contributor guide",
112
+ r"(?i)developer program",
113
+ r"(?i)join (our|the) (ecosystem|community)",
114
+ r"(?i)become a (contributor|partner)",
115
+ r"(?i)extend (our|the) (platform|ecosystem)",
116
+ r"(?i)create (your own|an?) (plugin|extension|integration)",
117
+ r"(?i)developer (resources|portal)",
118
+ r"(?i)sdk",
119
+ r"(?i)api (access|integration)",
120
+ r"(?i)partner (program|portal)",
121
+ ]
122
+
123
+ # Visual cues that often indicate ecosystem showcases
124
+ self.visual_indicators = [
125
+ r"logo grid",
126
+ r"logo carousel",
127
+ r"client logos",
128
+ r"partner logos",
129
+ r"customer logos",
130
+ r"company logos",
131
+ r"card gallery",
132
+ r"project cards",
133
+ r"showcase gallery",
134
+ r"case study cards",
135
+ r"testimonials",
136
+ r"user testimonials",
137
+ ]
138
+
139
+ # Collaboration-specific patterns
140
+ self.collaboration_patterns = [
141
+ r"(?i)how to collaborate",
142
+ r"(?i)collaboration (guide|opportunities)",
143
+ r"(?i)working together",
144
+ r"(?i)partner(ship|ing) (opportunities|program)",
145
+ r"(?i)join (our|the) (community|ecosystem)",
146
+ r"(?i)community (contribution|participation)",
147
+ r"(?i)open (source|collaboration)",
148
+ r"(?i)contribute (code|documentation|resources)",
149
+ r"(?i)become a (partner|contributor|maintainer)",
150
+ r"(?i)collaboration (framework|model)",
151
+ r"(?i)(business|technical) partnership",
152
+ r"(?i)developer relations",
153
+ r"(?i)community (engagement|involvement)",
154
+ ]
155
+
156
+ # Key meta tags that might indicate ecosystem content
157
+ self.meta_tag_patterns = [
158
+ r"(?i)ecosystem",
159
+ r"(?i)showcase",
160
+ r"(?i)community",
161
+ r"(?i)partner program",
162
+ r"(?i)integration",
163
+ r"(?i)extension",
164
+ r"(?i)plugin",
165
+ r"(?i)marketplace",
166
+ r"(?i)collaboration",
167
+ r"(?i)use cases",
168
+ r"(?i)case studies",
169
+ r"(?i)success stories",
170
+ ]
171
+
172
+ # Schema.org types that often indicate ecosystem relationships
173
+ self.schema_types = [
174
+ "Product",
175
+ "SoftwareApplication",
176
+ "Organization",
177
+ "BusinessPartner",
178
+ "ProgramMembership",
179
+ "CreativeWork",
180
+ "SoftwareSourceCode",
181
+ "WebApplication",
182
+ ]
@@ -19,7 +19,13 @@ class PlayWrightWebContentFetcher(WebFetcherRepository):
19
19
 
20
20
  async def fetch(self, url: str) -> Optional[str]:
21
21
  async with async_playwright() as p:
22
- browser = await p.chromium.launch(headless=True)
22
+ browser = await p.chromium.launch(headless=True, args=[
23
+ '--disable-dev-shm-usage', # Required for Docker
24
+ '--no-sandbox', # Required for Docker non-root user
25
+ '--disable-setuid-sandbox', # Required for Docker security
26
+ '--disable-gpu', # Reduces resource usage
27
+ ])
28
+
23
29
  logger.debug(
24
30
  f"Launching headless browser with user agent: {self.headers.get('User-Agent')}"
25
31
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: h_ai_brain
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: AI Research agent API
5
5
  Author-email: shoebill <shoebill.hai@gmail.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -14,6 +14,8 @@ h_ai/domain/reasoning/tool_message.py,sha256=jpbfbJXj6oqZyB3lDxGOUyFB4faHtXAaEOV
14
14
  h_ai/domain/web_docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  h_ai/domain/web_docs/doc_link_scorer_service.py,sha256=EmLSOaX7BCUQcKHZquaUt-Ps_DssZrRqpch5MgbUhAc,1444
16
16
  h_ai/domain/web_docs/documentation_pattern_repository.py,sha256=VhNzP3PUqgg9MaWhBVefj13XNxRBh6ZPUt-KH70ww2w,1302
17
+ h_ai/domain/web_docs/ecosystem_link_scorer_service.py,sha256=Slin3ZAdJ3o3CxTvJtfD-vd4R4f-MINd3PY2G3bCCQg,2899
18
+ h_ai/domain/web_docs/ecosystem_pattern_repository.py,sha256=uHBhEvz3HmhXRvFJ6BzJddZmngPSAQw-q39TgRLJiPg,6609
17
19
  h_ai/domain/web_docs/gitbook_web_fetcher_service.py,sha256=Ye-TcuwgW1fhIY8x6v9_-pmPN9pVFWzlOpwRt-4teaA,6490
18
20
  h_ai/domain/web_docs/web_docs_link_detector.py,sha256=NyMKFNs-41bqrxx6u-9GqIufy7pkDF_-_f1h8HECBK8,1192
19
21
  h_ai/domain/web_docs/web_link.py,sha256=J4KC3MmjkvWlAPDdEdjcqAZCvuSnJMahudCohiBk3wk,307
@@ -39,10 +41,10 @@ h_ai/infrastructure/llm/ollama/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
39
41
  h_ai/infrastructure/llm/ollama/models/ollama_chat_message.py,sha256=ZIz4PQ3869vI3xAYYufPrxXpacajRDtOI8RDl5Dm9RQ,305
40
42
  h_ai/infrastructure/llm/ollama/models/ollama_chat_session.py,sha256=GZ_ddpbWa8iy6NZq50vokUFVZBiX0WNa81z9-r9RzTY,392
41
43
  h_ai/infrastructure/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- h_ai/infrastructure/playwright/playwright_web_content_fetcher.py,sha256=fcf8ZxIR2bLU1rV378ASQUaI26PvaX7sYqQb1SDlkt4,2401
43
- h_ai_brain-0.0.5.dist-info/licenses/LICENSE,sha256=SbvpEU5JIU3yzMMkyzrI0dGqHDoJR_lMKGdl6GZHsy4,11558
44
- h_ai_brain-0.0.5.dist-info/licenses/NOTICE.txt,sha256=vxeIKUiGqAePLvDW4AVm3Xh-3BcsvMtCMn1tbsr9zsE,668
45
- h_ai_brain-0.0.5.dist-info/METADATA,sha256=d6J1f3gTeJaS90RwHDfPLuwPila1TzYFxbRInOqHqHM,735
46
- h_ai_brain-0.0.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
47
- h_ai_brain-0.0.5.dist-info/top_level.txt,sha256=3MChDBWvDJV4cEHuZhzeODxQ4ewtw-arOuyaDOc6sIo,5
48
- h_ai_brain-0.0.5.dist-info/RECORD,,
44
+ h_ai/infrastructure/playwright/playwright_web_content_fetcher.py,sha256=FVwcK6hv_6aE4fYlJapLHyxNHsztQkKaulklHabyrEc,2684
45
+ h_ai_brain-0.0.6.dist-info/licenses/LICENSE,sha256=SbvpEU5JIU3yzMMkyzrI0dGqHDoJR_lMKGdl6GZHsy4,11558
46
+ h_ai_brain-0.0.6.dist-info/licenses/NOTICE.txt,sha256=vxeIKUiGqAePLvDW4AVm3Xh-3BcsvMtCMn1tbsr9zsE,668
47
+ h_ai_brain-0.0.6.dist-info/METADATA,sha256=taZvGg8xXlBSu13t5DKFWXBPybB8k6Zi0Izjk3qpLv8,735
48
+ h_ai_brain-0.0.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
49
+ h_ai_brain-0.0.6.dist-info/top_level.txt,sha256=3MChDBWvDJV4cEHuZhzeODxQ4ewtw-arOuyaDOc6sIo,5
50
+ h_ai_brain-0.0.6.dist-info/RECORD,,