hinglish-nlp 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: hinglish-nlp
3
+ Version: 0.2.0
4
+ Summary: Hinglish (Roman Hindi + English) NLP toolkit - Sentiment, Emotion, Sarcasm & more
5
+ Author-email: Lalit <official.lalitpal08@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Lalit2206/hinglish-nlp
8
+ Project-URL: Repository, https://github.com/Lalit2206/hinglish-nlp
9
+ Keywords: hinglish,nlp,sentiment,hindi,roman-hindi,text-analysis
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Topic :: Text Processing :: Linguistic
14
+ Classifier: Natural Language :: Hindi
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: pydantic>=2.0
18
+
19
+ # hinglish-nlp 🇮🇳
20
+
21
+ A powerful NLP toolkit for **Hinglish** (Roman Hindi + English) text analysis.
22
+
23
+ [![PyPI version](https://badge.fury.io/py/hinglish-nlp.svg)](https://badge.fury.io/py/hinglish-nlp)
24
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
25
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
26
+
27
+ ---
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install hinglish-nlp
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Features
38
+
39
+ | Feature | Description |
40
+ |--------|-------------|
41
+ | ✅ Sentiment Analysis | Positive / Negative / Neutral / Mixed |
42
+ | ✅ Emotion Detection | Joy, Anger, Sadness, Fear, Surprise, Disgust |
43
+ | ✅ Sarcasm Detection | Pattern + contradiction based |
44
+ | ✅ Language Mix Detection | Hinglish vs English ratio |
45
+ | ✅ Transliteration | Roman Hindi → Devanagari |
46
+ | ✅ Batch Processing | Multiple texts at once |
47
+ | ✅ Key Phrase Extraction | Important phrases from text |
48
+ | ✅ Intensity Score | 0.0 to 1.0 scale |
49
+ | ✅ Confidence Score | How sure the model is |
50
+
51
+ ---
52
+
53
+ ## Usage
54
+
55
+ ### Basic Sentiment Analysis
56
+ ```python
57
+ from hinglish import analyze
58
+
59
+ result = analyze("yaar bahut mast movie thi!")
60
+ print(result["mood"]) # positive
61
+ print(result["emoji"]) # 😊
62
+ print(result["intensity"]) # 0.45
63
+ print(result["confidence"]) # 0.75
64
+ ```
65
+
66
+ ### Emotion Detection
67
+ ```python
68
+ from hinglish import detect_emotion
69
+
70
+ emotions = detect_emotion("mujhe bahut gussa aa raha hai!")
71
+ print(emotions) # {'anger': 0.35}
72
+
73
+ emotions = detect_emotion("aaj bahut khushi hui yaar!")
74
+ print(emotions) # {'joy': 0.7}
75
+ ```
76
+
77
+ ### Sarcasm Detection
78
+ ```python
79
+ from hinglish import is_sarcastic
80
+
81
+ result = is_sarcastic("haan bilkul, bahut accha hai na!!")
82
+ print(result)
83
+ # {'is_sarcastic': True, 'confidence': 0.6}
84
+ ```
85
+
86
+ ### Language Mix Detection
87
+ ```python
88
+ from hinglish import detect_language
89
+
90
+ mix = detect_language("yaar ye movie bahut boring thi")
91
+ print(mix)
92
+ # {'hinglish': 0.5, 'english': 0.33, 'unknown': 0.17}
93
+ ```
94
+
95
+ ### Transliteration (Roman → Devanagari)
96
+ ```python
97
+ from hinglish import transliterate
98
+
99
+ text = transliterate("mera naam lalit hai")
100
+ print(text) # मेरा नाम ललित है
101
+ ```
102
+
103
+ ### Batch Processing
104
+ ```python
105
+ from hinglish import analyze_batch
106
+
107
+ texts = [
108
+ "yaar mast movie thi!",
109
+ "bilkul bakwaas tha yaar",
110
+ "theek thak tha, kuch khaas nahi"
111
+ ]
112
+
113
+ results = analyze_batch(texts)
114
+ for r in results:
115
+ print(r["mood"], r["emoji"])
116
+ # positive 😊
117
+ # negative 😠
118
+ # neutral 😐
119
+ ```
120
+
121
+ ### Full Analysis
122
+ ```python
123
+ from hinglish import analyze
124
+
125
+ result = analyze("Phone ki battery toh bekar hai but camera mast hai")
126
+ print(result)
127
+ # {
128
+ # 'mood': 'mixed',
129
+ # 'intensity': 0.3,
130
+ # 'confidence': 0.75,
131
+ # 'emoji': '🤨',
132
+ # 'sentiment': 'mixed',
133
+ # 'key_phrases': ['Phone ki battery toh bekar hai but camera mast hai'],
134
+ # 'sarcasm': False,
135
+ # 'sarcasm_confidence': 0.0,
136
+ # 'language_mix': {'hinglish': 0.36, 'english': 0.55, 'unknown': 0.09},
137
+ # 'category': 'mixed',
138
+ # 'summary': 'A detailed Hinglish message expressing mixed sentiment...',
139
+ # 'emotions': {'disgust': 0.35},
140
+ # 'word_count': 11,
141
+ # 'positive_words_found': ['mast'],
142
+ # 'negative_words_found': ['bekar'],
143
+ # 'transliteration': 'Phone की battery तो bekar है but camera मस्त है'
144
+ # }
145
+ ```
146
+
147
+ ---
148
+
149
+ ## Output Fields
150
+
151
+ | Field | Type | Description |
152
+ |-------|------|-------------|
153
+ | `mood` | str | positive / negative / neutral / mixed |
154
+ | `intensity` | float | 0.0 – 1.0 |
155
+ | `confidence` | float | 0.0 – 1.0 |
156
+ | `emoji` | str | Visual mood indicator |
157
+ | `sentiment` | str | Same as mood |
158
+ | `key_phrases` | list | Important phrases |
159
+ | `sarcasm` | bool | Is text sarcastic? |
160
+ | `sarcasm_confidence` | float | Sarcasm confidence score |
161
+ | `language_mix` | dict | hinglish / english / unknown ratio |
162
+ | `category` | str | praise / complaint / casual / mixed |
163
+ | `summary` | str | Short summary of the text |
164
+ | `emotions` | dict | Detected emotions with scores |
165
+ | `word_count` | int | Total word count |
166
+ | `positive_words_found` | list | Positive words detected |
167
+ | `negative_words_found` | list | Negative words detected |
168
+ | `transliteration` | str | Roman → Devanagari |
169
+
170
+ ---
171
+
172
+ ## Author
173
+
174
+ **Lalit** — [lalitpal2206](https://pypi.org/user/lalitpal2206/)
175
+
176
+ ## License
177
+
178
+ MIT License
@@ -0,0 +1,160 @@
1
+ # hinglish-nlp 🇮🇳
2
+
3
+ A powerful NLP toolkit for **Hinglish** (Roman Hindi + English) text analysis.
4
+
5
+ [![PyPI version](https://badge.fury.io/py/hinglish-nlp.svg)](https://badge.fury.io/py/hinglish-nlp)
6
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ---
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install hinglish-nlp
15
+ ```
16
+
17
+ ---
18
+
19
+ ## Features
20
+
21
+ | Feature | Description |
22
+ |--------|-------------|
23
+ | ✅ Sentiment Analysis | Positive / Negative / Neutral / Mixed |
24
+ | ✅ Emotion Detection | Joy, Anger, Sadness, Fear, Surprise, Disgust |
25
+ | ✅ Sarcasm Detection | Pattern + contradiction based |
26
+ | ✅ Language Mix Detection | Hinglish vs English ratio |
27
+ | ✅ Transliteration | Roman Hindi → Devanagari |
28
+ | ✅ Batch Processing | Multiple texts at once |
29
+ | ✅ Key Phrase Extraction | Important phrases from text |
30
+ | ✅ Intensity Score | 0.0 to 1.0 scale |
31
+ | ✅ Confidence Score | How sure the model is |
32
+
33
+ ---
34
+
35
+ ## Usage
36
+
37
+ ### Basic Sentiment Analysis
38
+ ```python
39
+ from hinglish import analyze
40
+
41
+ result = analyze("yaar bahut mast movie thi!")
42
+ print(result["mood"]) # positive
43
+ print(result["emoji"]) # 😊
44
+ print(result["intensity"]) # 0.45
45
+ print(result["confidence"]) # 0.75
46
+ ```
47
+
48
+ ### Emotion Detection
49
+ ```python
50
+ from hinglish import detect_emotion
51
+
52
+ emotions = detect_emotion("mujhe bahut gussa aa raha hai!")
53
+ print(emotions) # {'anger': 0.35}
54
+
55
+ emotions = detect_emotion("aaj bahut khushi hui yaar!")
56
+ print(emotions) # {'joy': 0.7}
57
+ ```
58
+
59
+ ### Sarcasm Detection
60
+ ```python
61
+ from hinglish import is_sarcastic
62
+
63
+ result = is_sarcastic("haan bilkul, bahut accha hai na!!")
64
+ print(result)
65
+ # {'is_sarcastic': True, 'confidence': 0.6}
66
+ ```
67
+
68
+ ### Language Mix Detection
69
+ ```python
70
+ from hinglish import detect_language
71
+
72
+ mix = detect_language("yaar ye movie bahut boring thi")
73
+ print(mix)
74
+ # {'hinglish': 0.5, 'english': 0.33, 'unknown': 0.17}
75
+ ```
76
+
77
+ ### Transliteration (Roman → Devanagari)
78
+ ```python
79
+ from hinglish import transliterate
80
+
81
+ text = transliterate("mera naam lalit hai")
82
+ print(text) # मेरा नाम ललित है
83
+ ```
84
+
85
+ ### Batch Processing
86
+ ```python
87
+ from hinglish import analyze_batch
88
+
89
+ texts = [
90
+ "yaar mast movie thi!",
91
+ "bilkul bakwaas tha yaar",
92
+ "theek thak tha, kuch khaas nahi"
93
+ ]
94
+
95
+ results = analyze_batch(texts)
96
+ for r in results:
97
+ print(r["mood"], r["emoji"])
98
+ # positive 😊
99
+ # negative 😠
100
+ # neutral 😐
101
+ ```
102
+
103
+ ### Full Analysis
104
+ ```python
105
+ from hinglish import analyze
106
+
107
+ result = analyze("Phone ki battery toh bekar hai but camera mast hai")
108
+ print(result)
109
+ # {
110
+ # 'mood': 'mixed',
111
+ # 'intensity': 0.3,
112
+ # 'confidence': 0.75,
113
+ # 'emoji': '🤨',
114
+ # 'sentiment': 'mixed',
115
+ # 'key_phrases': ['Phone ki battery toh bekar hai but camera mast hai'],
116
+ # 'sarcasm': False,
117
+ # 'sarcasm_confidence': 0.0,
118
+ # 'language_mix': {'hinglish': 0.36, 'english': 0.55, 'unknown': 0.09},
119
+ # 'category': 'mixed',
120
+ # 'summary': 'A detailed Hinglish message expressing mixed sentiment...',
121
+ # 'emotions': {'disgust': 0.35},
122
+ # 'word_count': 11,
123
+ # 'positive_words_found': ['mast'],
124
+ # 'negative_words_found': ['bekar'],
125
+ # 'transliteration': 'Phone की battery तो bekar है but camera मस्त है'
126
+ # }
127
+ ```
128
+
129
+ ---
130
+
131
+ ## Output Fields
132
+
133
+ | Field | Type | Description |
134
+ |-------|------|-------------|
135
+ | `mood` | str | positive / negative / neutral / mixed |
136
+ | `intensity` | float | 0.0 – 1.0 |
137
+ | `confidence` | float | 0.0 – 1.0 |
138
+ | `emoji` | str | Visual mood indicator |
139
+ | `sentiment` | str | Same as mood |
140
+ | `key_phrases` | list | Important phrases |
141
+ | `sarcasm` | bool | Is text sarcastic? |
142
+ | `sarcasm_confidence` | float | Sarcasm confidence score |
143
+ | `language_mix` | dict | hinglish / english / unknown ratio |
144
+ | `category` | str | praise / complaint / casual / mixed |
145
+ | `summary` | str | Short summary of the text |
146
+ | `emotions` | dict | Detected emotions with scores |
147
+ | `word_count` | int | Total word count |
148
+ | `positive_words_found` | list | Positive words detected |
149
+ | `negative_words_found` | list | Negative words detected |
150
+ | `transliteration` | str | Roman → Devanagari |
151
+
152
+ ---
153
+
154
+ ## Author
155
+
156
+ **Lalit** — [lalitpal2206](https://pypi.org/user/lalitpal2206/)
157
+
158
+ ## License
159
+
160
+ MIT License
@@ -0,0 +1,38 @@
1
+ """
2
+ hinglish-nlp
3
+ ============
4
+ A powerful NLP toolkit for Hinglish (Roman Hindi + English) text analysis.
5
+
6
+ Quick start
7
+ -----------
8
+ >>> from hinglish import analyze
9
+ >>> result = analyze("yaar bahut mast movie thi!")
10
+ >>> print(result["mood"]) # positive
11
+ >>> print(result["emotions"]) # {'joy': 0.35}
12
+
13
+ >>> from hinglish import analyze_batch, transliterate, detect_emotion
14
+ """
15
+
16
+ from .analyzer import (
17
+ analyze,
18
+ analyze_batch,
19
+ transliterate,
20
+ detect_language,
21
+ detect_emotion,
22
+ is_sarcastic,
23
+ HinglishAnalysis,
24
+ HinglishAnalyzer,
25
+ )
26
+
27
+ __version__ = "0.2.0"
28
+ __author__ = "Lalit"
29
+ __all__ = [
30
+ "analyze",
31
+ "analyze_batch",
32
+ "transliterate",
33
+ "detect_language",
34
+ "detect_emotion",
35
+ "is_sarcastic",
36
+ "HinglishAnalysis",
37
+ "HinglishAnalyzer",
38
+ ]
@@ -0,0 +1,368 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, List, Any, Optional
3
+ import re
4
+
5
+
6
+ # LEXICON
7
+ POSITIVE_WORDS = {
8
+ # General positive
9
+ "mast", "badiya", "zabardast", "awesome", "superb", "love", "best",
10
+ "perfect", "great", "achha", "accha", "nice", "kamaal", "shandaar",
11
+ "dhamaka", "bindaas", "jhakaas", "wah", "waah", "solid", "killer",
12
+ "dope", "lit", "fire", "too good", "ekdum sahi", "full mast",
13
+ "khush", "khushi", "maza", "party", "celebrate", "lajawaab",
14
+ "shaandaar", "gazab", "jalwa", "badhiya", "outstanding", "excellent",
15
+ "wonderful", "fantastic", "brilliant", "amazing", "superb", "fabulous",
16
+ "incredible", "marvelous", "splendid", "terrific", "spectacular",
17
+ "magnificent", "glorious", "superb", "lovely", "beautiful", "gorgeous",
18
+ "stunning", "charming", "delightful", "enjoyable", "fun", "exciting",
19
+ "thrilling", "refreshing", "satisfying", "pleasing", "impressive",
20
+ "remarkable", "extraordinary", "phenomenal", "exceptional", "top",
21
+ "sahi", "bilkul sahi", "dum", "dum hai", "kya baat", "ekdum",
22
+ "maja", "maja aaya", "mst", "bdhiya", "zbrdsst", "kmaal",
23
+ "happy", "glad", "joyful", "cheerful", "content", "pleased",
24
+ "grateful", "thankful", "blessed", "fortunate", "lucky",
25
+ "proud", "confident", "hopeful", "optimistic", "positive",
26
+ "energetic", "enthusiastic", "passionate", "motivated", "inspired",
27
+ "relaxed", "peaceful", "calm", "comfortable", "satisfied",
28
+ }
29
+
30
+ NEGATIVE_WORDS = {
31
+ # General negative
32
+ "bakwaas", "bekar", "kharaab", "worst", "faltu", "garbage",
33
+ "disappoint", "boring", "time waste", "bekaar", "ganda", "bura",
34
+ "kharab", "wahiyat", "forma", "forma", "useless", "pathetic",
35
+ "terrible", "horrible", "awful", "dreadful", "atrocious", "abysmal",
36
+ "disgusting", "revolting", "repulsive", "nasty", "vile", "foul",
37
+ "lousy", "poor", "inferior", "substandard", "inadequate", "deficient",
38
+ "flawed", "faulty", "broken", "damaged", "ruined", "destroyed",
39
+ "failed", "failure", "disaster", "catastrophe", "tragedy", "mess",
40
+ "problem", "issue", "trouble", "difficulty", "challenge", "obstacle",
41
+ "frustrating", "annoying", "irritating", "aggravating", "infuriating",
42
+ "disappointing", "unsatisfying", "dissatisfying", "displeasing",
43
+ "sad", "unhappy", "miserable", "depressed", "gloomy", "melancholy",
44
+ "upset", "distressed", "troubled", "worried", "anxious", "stressed",
45
+ "angry", "furious", "enraged", "outraged", "livid", "irate",
46
+ "disgusted", "repelled", "appalled", "horrified", "shocked",
47
+ "bored", "dull", "tedious", "monotonous", "repetitive", "bland",
48
+ "nahi", "mat", "bandh", "band", "chup", "shut",
49
+ }
50
+
51
+ INTENSIFIERS = {
52
+ "bahut", "bohot", "bht", "very", "super", "ekdum", "bilkul",
53
+ "sabse", "itna", "utna", "kitna", "zyada", "boht", "bhot",
54
+ "extremely", "incredibly", "absolutely", "totally", "completely",
55
+ "utterly", "highly", "deeply", "strongly", "severely", "greatly",
56
+ "tremendously", "enormously", "immensely", "vastly", "profoundly",
57
+ }
58
+
59
+ EMOTION_LEXICON = {
60
+ "anger": {
61
+ "gussa", "ghussa", "pagal", "bakwas", "beizzati", "insult",
62
+ "angry", "furious", "rage", "mad", "irritated", "annoyed",
63
+ "frustrated", "outraged", "livid", "enraged", "irate",
64
+ "aggressive", "hostile", "violent", "hateful", "resentful",
65
+ "jalega", "jala", "jalao", "maar", "marunga", "toot",
66
+ },
67
+ "joy": {
68
+ "khushi", "maza", "party", "celebrate", "mast", "khush",
69
+ "happy", "joyful", "cheerful", "delighted", "elated", "ecstatic",
70
+ "thrilled", "excited", "glad", "pleased", "content", "satisfied",
71
+ "blissful", "euphoric", "overjoyed", "jubilant", "radiant",
72
+ "maja", "maja aaya", "bindaas", "dhamaal", "moj", "masti",
73
+ },
74
+ "sadness": {
75
+ "dukh", "rona", "bura", "miss", "akela", "dard", "toot",
76
+ "sad", "unhappy", "miserable", "depressed", "gloomy", "melancholy",
77
+ "sorrowful", "heartbroken", "devastated", "grief", "mourning",
78
+ "lonely", "isolated", "abandoned", "rejected", "hurt", "pain",
79
+ "cry", "tears", "weep", "sob", "lament", "grieve",
80
+ "roya", "rote", "aansu", "tadap", "bichar",
81
+ },
82
+ "surprise": {
83
+ "kya", "seriously", "matlab", "no way", "sach", "sachchi",
84
+ "surprised", "shocked", "astonished", "amazed", "stunned",
85
+ "bewildered", "dumbfounded", "flabbergasted", "astounded",
86
+ "unexpected", "unbelievable", "incredible", "wow", "omg",
87
+ "arre", "arrey", "oye", "yaar sach", "are bhai",
88
+ },
89
+ "fear": {
90
+ "dar", "darr", "tension", "problem", "mushkil", "dara",
91
+ "afraid", "scared", "frightened", "terrified", "horrified",
92
+ "anxious", "nervous", "worried", "stressed", "panicked",
93
+ "dread", "terror", "phobia", "paranoid", "threatened",
94
+ "darr gaya", "darta hun", "daro mat",
95
+ },
96
+ "disgust": {
97
+ "yuck", "chhi", "ganda", "ulti", "bura laga", "nafrat",
98
+ "disgusting", "revolting", "repulsive", "nasty", "vile",
99
+ "gross", "horrible", "awful", "terrible", "dreadful",
100
+ "loathsome", "abhorrent", "detestable", "despicable",
101
+ "hate", "hatred", "abhor", "detest", "despise", "loathe",
102
+ },
103
+ }
104
+
105
+ SARCASM_PATTERNS = [
106
+ r"bahut\s+acch[ao]\s+hai\s*na",
107
+ r"haan\s+bilkul",
108
+ r"wah\s+kya\s+baat",
109
+ r"!{2,}",
110
+ r"(?:oh\s+)?sure\s+yaar",
111
+ r"bilkul\s+sahi\s+(?:hai|tha|thi)",
112
+ r"kitna\s+(?:accha|mast|badiya)\s+(?:hai|tha|thi)\s*(?:na|yaar)?",
113
+ r"great\s+yaar",
114
+ r"very\s+nice\s+(?:yaar|bhai)",
115
+ ]
116
+
117
+ HINGLISH_WORDS = {
118
+ "yaar", "bhai", "mein", "hai", "toh", "kya", "nahi", "haan",
119
+ "abhi", "kal", "aaj", "phir", "kab", "kahan", "kyun", "kaisa",
120
+ "accha", "achha", "theek", "sahi", "matlab", "samjha", "dekho",
121
+ "suno", "bolo", "jao", "aao", "ruko", "chalo", "batao",
122
+ "mast", "badiya", "zabardast", "bindaas", "dhamaka", "kamaal",
123
+ "bilkul", "ekdum", "bahut", "bohot", "itna", "zyada",
124
+ "ghar", "dost", "paisa", "kaam", "time", "baat", "cheez",
125
+ "log", "aadmi", "ladka", "ladki", "bachha", "mama", "papa",
126
+ "khana", "pani", "chai", "coffee", "movie", "gana", "game",
127
+ "phone", "laptop", "net", "wifi", "app", "online",
128
+ "arre", "arrey", "oye", "yaar", "bhai", "boss", "dude",
129
+ }
130
+
131
+ TRANSLITERATION_MAP = {
132
+ "mera": "मेरा", "tera": "तेरा", "uska": "उसका", "hamara": "हमारा",
133
+ "naam": "नाम", "ghar": "घर", "dost": "दोस्त", "pyaar": "प्यार",
134
+ "khushi": "खुशी", "dukh": "दुःख", "zindagi": "ज़िंदगी",
135
+ "yaar": "यार", "bhai": "भाई", "kya": "क्या", "hai": "है",
136
+ "nahi": "नहीं", "haan": "हाँ", "accha": "अच्छा", "achha": "अच्छा",
137
+ "bahut": "बहुत", "bohot": "बहुत", "mast": "मस्त",
138
+ "badiya": "बढ़िया", "zabardast": "ज़बरदस्त", "kamaal": "कमाल",
139
+ "theek": "ठीक", "sahi": "सही", "galat": "गलत",
140
+ "khana": "खाना", "pani": "पानी", "chai": "चाय",
141
+ "aaj": "आज", "kal": "कल", "abhi": "अभी",
142
+ "main": "मैं", "mein": "में", "toh": "तो",
143
+ "kaam": "काम", "paisa": "पैसा", "time": "टाइम",
144
+ "phone": "फ़ोन", "movie": "मूवी", "gana": "गाना",
145
+ "dil": "दिल", "aankhein": "आँखें", "haath": "हाथ",
146
+ "gussa": "गुस्सा", "dar": "डर", "khauf": "ख़ौफ़",
147
+ "hasna": "हँसना", "rona": "रोना", "bolna": "बोलना",
148
+ "sunna": "सुनना", "dekhna": "देखना", "jaana": "जाना",
149
+ "aana": "आना", "karna": "करना", "rehna": "रहना",
150
+ }
151
+
152
+
153
+ # DATACLASS
154
+ @dataclass
155
+ class HinglishAnalysis:
156
+ mood: str
157
+ intensity: float
158
+ confidence: float
159
+ emoji: str
160
+ sentiment: str
161
+ key_phrases: List[str]
162
+ sarcasm: bool
163
+ sarcasm_confidence: float
164
+ language_mix: Dict[str, float]
165
+ category: str
166
+ summary: str
167
+ emotions: Dict[str, float]
168
+ word_count: int
169
+ positive_words_found: List[str]
170
+ negative_words_found: List[str]
171
+ transliteration: Optional[str] = None
172
+
173
+
174
+ # MAIN ANALYZER
175
+ class HinglishAnalyzer:
176
+
177
+ # ── Language Detection ──────────────────
178
+ def detect_language_mix(self, text: str) -> Dict[str, float]:
179
+ words = re.findall(r'\w+', text.lower())
180
+ if not words:
181
+ return {"hinglish": 0.5, "english": 0.5, "unknown": 0.0}
182
+
183
+ hinglish_count = sum(1 for w in words if w in HINGLISH_WORDS)
184
+ # simple English heuristic: words NOT in hinglish set, length > 2
185
+ english_count = sum(
186
+ 1 for w in words
187
+ if w not in HINGLISH_WORDS and len(w) > 2 and w.isalpha()
188
+ )
189
+ total = len(words)
190
+ unknown = max(0, total - hinglish_count - english_count)
191
+
192
+ return {
193
+ "hinglish": round(hinglish_count / total, 2),
194
+ "english": round(english_count / total, 2),
195
+ "unknown": round(unknown / total, 2),
196
+ }
197
+
198
+ # ── Sarcasm Detection ───────────────────
199
+ def detect_sarcasm(self, text: str) -> Dict[str, Any]:
200
+ text_lower = text.lower()
201
+ matched = [p for p in SARCASM_PATTERNS if re.search(p, text_lower)]
202
+
203
+ # Contradiction: positive word + negative context
204
+ words = set(re.findall(r'\w+', text_lower))
205
+ has_positive = bool(words & POSITIVE_WORDS)
206
+ has_negative = bool(words & NEGATIVE_WORDS)
207
+ contradiction = has_positive and has_negative
208
+
209
+ score = len(matched) * 0.3 + (0.2 if contradiction else 0)
210
+ score = min(1.0, score)
211
+ return {"is_sarcastic": score > 0.3, "confidence": round(score, 2)}
212
+
213
+ # ── Emotion Detection ───────────────────
214
+ def detect_emotions(self, words: List[str]) -> Dict[str, float]:
215
+ word_set = set(words)
216
+ scores = {}
217
+ for emotion, lexicon in EMOTION_LEXICON.items():
218
+ matches = len(word_set & lexicon)
219
+ if matches:
220
+ scores[emotion] = round(min(1.0, matches * 0.35), 2)
221
+ return scores if scores else {"neutral": 1.0}
222
+
223
+ # ── Confidence ──────────────────────────
224
+ def calculate_confidence(self, pos: int, neg: int, total: int) -> float:
225
+ signal = (pos + neg) / max(total, 1)
226
+ if signal > 0.3:
227
+ return 0.92
228
+ elif signal > 0.15:
229
+ return 0.75
230
+ elif signal > 0.05:
231
+ return 0.60
232
+ return 0.45
233
+
234
+ # ── Transliteration ─────────────────────
235
+ def transliterate(self, text: str) -> str:
236
+ words = text.split()
237
+ result = []
238
+ for word in words:
239
+ clean = re.sub(r'[^\w]', '', word.lower())
240
+ result.append(TRANSLITERATION_MAP.get(clean, word))
241
+ return " ".join(result)
242
+
243
+ # ── Key Phrases ─────────────────────────
244
+ def extract_key_phrases(self, text: str) -> List[str]:
245
+ sentences = re.split(r'[.!?,]', text)
246
+ phrases = []
247
+ for s in sentences:
248
+ s = s.strip()
249
+ if len(s) > 3:
250
+ phrases.append(s[:80])
251
+ return phrases[:4]
252
+
253
+ # ── Summary ─────────────────────────────
254
+ def generate_summary(self, text: str, mood: str, emotions: Dict) -> str:
255
+ top_emotion = max(emotions, key=emotions.get) if emotions else "neutral"
256
+ length = "short" if len(text.split()) < 6 else "detailed"
257
+ return (
258
+ f"A {length} Hinglish message expressing {mood} sentiment "
259
+ f"with primary emotion: {top_emotion}."
260
+ )
261
+
262
+ # ── Main Analyze ────────────────────────
263
+ def analyze(self, text: str) -> HinglishAnalysis:
264
+ if not text or not text.strip():
265
+ return self._neutral_result()
266
+
267
+ text_lower = text.lower()
268
+ words = re.findall(r'\w+', text_lower)
269
+
270
+ pos_found = [w for w in words if w in POSITIVE_WORDS]
271
+ neg_found = [w for w in words if w in NEGATIVE_WORDS]
272
+ intens_count = sum(1 for w in words if w in INTENSIFIERS)
273
+
274
+ pos_count = len(pos_found)
275
+ neg_count = len(neg_found)
276
+
277
+ # Intensity
278
+ intensity = min(
279
+ 1.0,
280
+ (pos_count + neg_count) * 0.20
281
+ + intens_count * 0.15
282
+ + len(words) * 0.01
283
+ )
284
+ intensity = round(intensity, 2)
285
+
286
+ # Mood & category
287
+ if pos_count > neg_count + 1:
288
+ mood, emoji, category = "positive", "😊", "praise"
289
+ elif neg_count > pos_count + 1:
290
+ mood, emoji, category = "negative", "😠", "complaint"
291
+ elif pos_count > 0 and neg_count > 0:
292
+ mood, emoji, category = "mixed", "🤨", "mixed"
293
+ else:
294
+ mood, emoji, category = "neutral", "😐", "casual"
295
+
296
+ sarcasm_result = self.detect_sarcasm(text)
297
+ emotions = self.detect_emotions(words)
298
+ lang_mix = self.detect_language_mix(text)
299
+ confidence = self.calculate_confidence(pos_count, neg_count, len(words))
300
+ key_phrases = self.extract_key_phrases(text)
301
+ summary = self.generate_summary(text, mood, emotions)
302
+ transliteration = self.transliterate(text)
303
+
304
+ return HinglishAnalysis(
305
+ mood=mood,
306
+ intensity=intensity,
307
+ confidence=confidence,
308
+ emoji=emoji,
309
+ sentiment=mood,
310
+ key_phrases=key_phrases,
311
+ sarcasm=sarcasm_result["is_sarcastic"],
312
+ sarcasm_confidence=sarcasm_result["confidence"],
313
+ language_mix=lang_mix,
314
+ category=category,
315
+ summary=summary,
316
+ emotions=emotions,
317
+ word_count=len(words),
318
+ positive_words_found=list(set(pos_found)),
319
+ negative_words_found=list(set(neg_found)),
320
+ transliteration=transliteration,
321
+ )
322
+
323
+ def _neutral_result(self) -> HinglishAnalysis:
324
+ return HinglishAnalysis(
325
+ mood="neutral", intensity=0.0, confidence=0.5, emoji="😐",
326
+ sentiment="neutral", key_phrases=[], sarcasm=False,
327
+ sarcasm_confidence=0.0,
328
+ language_mix={"hinglish": 0.5, "english": 0.5, "unknown": 0.0},
329
+ category="casual", summary="Empty or neutral text.",
330
+ emotions={"neutral": 1.0}, word_count=0,
331
+ positive_words_found=[], negative_words_found=[],
332
+ transliteration="",
333
+ )
334
+
335
+
336
+ # PUBLIC API
337
+ _analyzer = HinglishAnalyzer()
338
+
339
+
340
+ def analyze(text: str) -> Dict[str, Any]:
341
+ """Analyze a single Hinglish text."""
342
+ return _analyzer.analyze(text).__dict__
343
+
344
+
345
+ def analyze_batch(texts: List[str]) -> List[Dict[str, Any]]:
346
+ """Analyze multiple Hinglish texts at once."""
347
+ return [analyze(t) for t in texts]
348
+
349
+
350
+ def transliterate(text: str) -> str:
351
+ """Convert Roman Hinglish to Devanagari script."""
352
+ return _analyzer.transliterate(text)
353
+
354
+
355
+ def detect_language(text: str) -> Dict[str, float]:
356
+ """Detect language mix in text."""
357
+ return _analyzer.detect_language_mix(text)
358
+
359
+
360
+ def detect_emotion(text: str) -> Dict[str, float]:
361
+ """Detect emotions in text."""
362
+ words = re.findall(r'\w+', text.lower())
363
+ return _analyzer.detect_emotions(words)
364
+
365
+
366
+ def is_sarcastic(text: str) -> Dict[str, Any]:
367
+ """Check if text is sarcastic."""
368
+ return _analyzer.detect_sarcasm(text)
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: hinglish-nlp
3
+ Version: 0.2.0
4
+ Summary: Hinglish (Roman Hindi + English) NLP toolkit - Sentiment, Emotion, Sarcasm & more
5
+ Author-email: Lalit <official.lalitpal08@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Lalit2206/hinglish-nlp
8
+ Project-URL: Repository, https://github.com/Lalit2206/hinglish-nlp
9
+ Keywords: hinglish,nlp,sentiment,hindi,roman-hindi,text-analysis
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Topic :: Text Processing :: Linguistic
14
+ Classifier: Natural Language :: Hindi
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: pydantic>=2.0
18
+
19
+ # hinglish-nlp 🇮🇳
20
+
21
+ A powerful NLP toolkit for **Hinglish** (Roman Hindi + English) text analysis.
22
+
23
+ [![PyPI version](https://badge.fury.io/py/hinglish-nlp.svg)](https://badge.fury.io/py/hinglish-nlp)
24
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
25
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
26
+
27
+ ---
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install hinglish-nlp
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Features
38
+
39
+ | Feature | Description |
40
+ |--------|-------------|
41
+ | ✅ Sentiment Analysis | Positive / Negative / Neutral / Mixed |
42
+ | ✅ Emotion Detection | Joy, Anger, Sadness, Fear, Surprise, Disgust |
43
+ | ✅ Sarcasm Detection | Pattern + contradiction based |
44
+ | ✅ Language Mix Detection | Hinglish vs English ratio |
45
+ | ✅ Transliteration | Roman Hindi → Devanagari |
46
+ | ✅ Batch Processing | Multiple texts at once |
47
+ | ✅ Key Phrase Extraction | Important phrases from text |
48
+ | ✅ Intensity Score | 0.0 to 1.0 scale |
49
+ | ✅ Confidence Score | How sure the model is |
50
+
51
+ ---
52
+
53
+ ## Usage
54
+
55
+ ### Basic Sentiment Analysis
56
+ ```python
57
+ from hinglish import analyze
58
+
59
+ result = analyze("yaar bahut mast movie thi!")
60
+ print(result["mood"]) # positive
61
+ print(result["emoji"]) # 😊
62
+ print(result["intensity"]) # 0.45
63
+ print(result["confidence"]) # 0.75
64
+ ```
65
+
66
+ ### Emotion Detection
67
+ ```python
68
+ from hinglish import detect_emotion
69
+
70
+ emotions = detect_emotion("mujhe bahut gussa aa raha hai!")
71
+ print(emotions) # {'anger': 0.35}
72
+
73
+ emotions = detect_emotion("aaj bahut khushi hui yaar!")
74
+ print(emotions) # {'joy': 0.7}
75
+ ```
76
+
77
+ ### Sarcasm Detection
78
+ ```python
79
+ from hinglish import is_sarcastic
80
+
81
+ result = is_sarcastic("haan bilkul, bahut accha hai na!!")
82
+ print(result)
83
+ # {'is_sarcastic': True, 'confidence': 0.6}
84
+ ```
85
+
86
+ ### Language Mix Detection
87
+ ```python
88
+ from hinglish import detect_language
89
+
90
+ mix = detect_language("yaar ye movie bahut boring thi")
91
+ print(mix)
92
+ # {'hinglish': 0.5, 'english': 0.33, 'unknown': 0.17}
93
+ ```
94
+
95
+ ### Transliteration (Roman → Devanagari)
96
+ ```python
97
+ from hinglish import transliterate
98
+
99
+ text = transliterate("mera naam lalit hai")
100
+ print(text) # मेरा नाम ललित है
101
+ ```
102
+
103
+ ### Batch Processing
104
+ ```python
105
+ from hinglish import analyze_batch
106
+
107
+ texts = [
108
+ "yaar mast movie thi!",
109
+ "bilkul bakwaas tha yaar",
110
+ "theek thak tha, kuch khaas nahi"
111
+ ]
112
+
113
+ results = analyze_batch(texts)
114
+ for r in results:
115
+ print(r["mood"], r["emoji"])
116
+ # positive 😊
117
+ # negative 😠
118
+ # neutral 😐
119
+ ```
120
+
121
+ ### Full Analysis
122
+ ```python
123
+ from hinglish import analyze
124
+
125
+ result = analyze("Phone ki battery toh bekar hai but camera mast hai")
126
+ print(result)
127
+ # {
128
+ # 'mood': 'mixed',
129
+ # 'intensity': 0.3,
130
+ # 'confidence': 0.75,
131
+ # 'emoji': '🤨',
132
+ # 'sentiment': 'mixed',
133
+ # 'key_phrases': ['Phone ki battery toh bekar hai but camera mast hai'],
134
+ # 'sarcasm': False,
135
+ # 'sarcasm_confidence': 0.0,
136
+ # 'language_mix': {'hinglish': 0.36, 'english': 0.55, 'unknown': 0.09},
137
+ # 'category': 'mixed',
138
+ # 'summary': 'A detailed Hinglish message expressing mixed sentiment...',
139
+ # 'emotions': {'disgust': 0.35},
140
+ # 'word_count': 11,
141
+ # 'positive_words_found': ['mast'],
142
+ # 'negative_words_found': ['bekar'],
143
+ # 'transliteration': 'Phone की battery तो bekar है but camera मस्त है'
144
+ # }
145
+ ```
146
+
147
+ ---
148
+
149
+ ## Output Fields
150
+
151
+ | Field | Type | Description |
152
+ |-------|------|-------------|
153
+ | `mood` | str | positive / negative / neutral / mixed |
154
+ | `intensity` | float | 0.0 – 1.0 |
155
+ | `confidence` | float | 0.0 – 1.0 |
156
+ | `emoji` | str | Visual mood indicator |
157
+ | `sentiment` | str | Same as mood |
158
+ | `key_phrases` | list | Important phrases |
159
+ | `sarcasm` | bool | Is text sarcastic? |
160
+ | `sarcasm_confidence` | float | Sarcasm confidence score |
161
+ | `language_mix` | dict | hinglish / english / unknown ratio |
162
+ | `category` | str | praise / complaint / casual / mixed |
163
+ | `summary` | str | Short summary of the text |
164
+ | `emotions` | dict | Detected emotions with scores |
165
+ | `word_count` | int | Total word count |
166
+ | `positive_words_found` | list | Positive words detected |
167
+ | `negative_words_found` | list | Negative words detected |
168
+ | `transliteration` | str | Roman → Devanagari |
169
+
170
+ ---
171
+
172
+ ## Author
173
+
174
+ **Lalit** — [lalitpal2206](https://pypi.org/user/lalitpal2206/)
175
+
176
+ ## License
177
+
178
+ MIT License
@@ -0,0 +1,9 @@
1
+ README.md
2
+ pyproject.toml
3
+ hinglish/__init__.py
4
+ hinglish/analyzer.py
5
+ hinglish_nlp.egg-info/PKG-INFO
6
+ hinglish_nlp.egg-info/SOURCES.txt
7
+ hinglish_nlp.egg-info/dependency_links.txt
8
+ hinglish_nlp.egg-info/requires.txt
9
+ hinglish_nlp.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ pydantic>=2.0
@@ -0,0 +1,2 @@
1
+ dist
2
+ hinglish
@@ -0,0 +1,28 @@
1
+ [project]
2
+ name = "hinglish-nlp"
3
+ version = "0.2.0"
4
+ description = "Hinglish (Roman Hindi + English) NLP toolkit - Sentiment, Emotion, Sarcasm & more"
5
+ readme = "README.md"
6
+ requires-python = ">=3.9"
7
+ license = {text = "MIT"}
8
+ authors = [
9
+ {name = "Lalit", email = "official.lalitpal08@gmail.com"}]
10
+ keywords = ["hinglish", "nlp", "sentiment", "hindi", "roman-hindi", "text-analysis"]
11
+ classifiers = [
12
+ "Programming Language :: Python :: 3",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Operating System :: OS Independent",
15
+ "Topic :: Text Processing :: Linguistic",
16
+ "Natural Language :: Hindi",
17
+ ]
18
+
19
+ dependencies = [
20
+ "pydantic>=2.0",
21
+ ]
22
+
23
+ [project.urls]
24
+ Homepage = "https://github.com/Lalit2206/hinglish-nlp"
25
+ Repository = "https://github.com/Lalit2206/hinglish-nlp"
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["."]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+